% Estimate (G)PME and alphas for individual PE funds
%
% GPME uses the exponential-affine factor model:
%   M(t+1) = exp[f(t+1) * b]

% The constructed variables are defined as follows:
%   PEfund_fundid   = Nx1 vector with fund identifiers
%   PEfund_firmid   = Nx1 vector with firm identifiers
%   PEfund_seqnr    = Nx1 vector with fund sequence number for a given firm
%   PEfund_vintage  = Nx1 vector with fund vintage years
%   PEfund_size     = Nx1 vector with size of fund in $mln for each fund 1..N
%   PEfund_size1990 = Nx1 vector with size of fund in 1990 mlns U.S. dollars for each fund 1..N (using CPIAUCSL from FRED to correct for inflation)
%   PEfund_nextfundid   = Nx1 vector with fund id of next fund of same type by same GP (NaN if there is no next fund of same PE firm and type in the data)
%   PEfund_nextvintage  = Nx1 vector with next fund's vintage year of same type by same GP (NaN if there is no next fund of same PE firm and type in the data)
%   PEfund_nextsize     = Nx1 vector with size in $mln of next fund of same type by same GP (NaN if there is no next fund of same PE firm and type in the data)
%   PEfund_nextsize1990 = Nx1 vector with size of next fund of same type by same GP, in 1990 mlns U.S. (NaN if there is no next fund of same PE firm and type in the data)
%   PEfund_numCF    = Nx1 vector with number of observed net cash flow dates (calls, distributions, and the final NAV) for each fund 1..N
%   PEfund_type     = Nx1 vector with fund type (VC = 1, Buyout = 2, Expansion Capital ("Growth Equity") = 3)
%   PEfund_subtype  = Nx1 vector with type of VC (1 = balanced; 2 = early-stage; 3 = late-stage)
%   PEfund_CF       = NxJ matrix of cash flows, ending with the final observed NAV (if not zero). Padded with zeros at the end of each row.
%   PEfund_datenums = NxJ matrix of dates of cash flows, in Matlab datenum format. Padded with zeros at the end of each row.
%   PEfund_T        = NxJ with time since fund firstdate and the date of the corresponding entry in PEfund_CF (in years)
%   PEfund_finalNAV = Nx1 vector of last stated NAV (if = 0 then fund is fully liquidated)
%   PEfund_liq100   = Nx1 vector, =1 if fund is fully (100%) liquidated (final NAV = 0 and fund is not being raised)
%   PEfund_liq95    = Nx1 vector, =1 if final NAV < 5% of fund size (and fund is not being raised)
%   PEfund_liq90    = Nx1 vector, =1 if final NAV < 10% of fund size (and fund is not being raised)
%   PEfund_TVPI     = Nx1 vector with fund TVPIs
%   PEfund_PME      = Nx1 vector with fund PMEs (calculated the traditional Kaplan-Schoar way)
%   PEfund_IRR      = Nx1 vector with fund IRRs (calculated the Preqin way, i.e., capping cash flow times at 10 years from initial cashflow)
%   PEfund_Rf       = NxJ matrix of risk-free asset return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic, compounded 1-month risk-free rates)
%   PEfund_Rm       = market return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_Rsmb     = SMB return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_Rhml     = HML return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RS       = return on small-firms portfolio, computed as compounded return on (SL+SM+SH)/3
%   PEfund_RS2      = return on small-firms portfolio, computed as compounded return on (rf+rSMB)
%   PEfund_RSL      = Small-Low B/M return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RSM      = Small-Medium B/M return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RSH      = Small-High B/M return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RBL      = Big-Low B/M return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RBM      = Big-Medium B/M return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RBH      = Big-High B/M return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RmVar    = NxJ matrix of variance of market returns since fund inception, from daily data (used for computing individual fund alphas and betas)
%
% NB: all returns are arithmetic, gross returns, i.e. 1.05 means 5% return.
%
% A fund is considered VC if its "Asset_Class" = "Venture Capital"
% A fund is considered Buyout if its "Sub_Asset_Class" is "Buyout"

clear all
close all
clc

debug       = true;     % true = use fake data file; false = use real Burgiss data

maxdate     = datenum('2022/03/31');   % 2022Q1 data; delete any dates after quarter-end

maxfunds    = 3500;     % max # funds in data (used to initialize variables)
maxnumCF    = 500;      % max # cash flows per fund (used to initialize variables)

if debug
    indir   = '';
    inname  = 'FakeData.xlsx';
    outdir  = indir;
else
    indir   = 'D:\Projects\Arthur\Exchange\';
    inname  = 'TransData2022Q1.csv';
    outdir  = 'D:\Projects\Arthur\Exchange\Out\';
end

% initialize diary
if exist([outdir 'Burgiss_log_' datestr(date,'yyyymmdd') '.txt'],'file')
    delete([outdir 'Burgiss_log_' datestr(date,'yyyymmdd') '.txt']);    % delete diary file, if it exists
end
diary([outdir 'Burgiss_log_' datestr(date,'yyyymmdd') '.txt']);
diary on            % this will save the screen output

% Set options for treatment of data
includeNAV  = 1;    % 0 = do not include final NAV; 1 = include final NAV

try                 % catch errors so the diary can be closed and saved


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Load Burgiss data 
%
% Contains the variables:
%     FundID, TransactionID, Inception_Date, Vintage, 
%     Asset_Class1, Asset_Class2, Asset_Class3, Geographic_Focus, 
%     Sub_Geographic_Focus, Currency, Type, Fund_Size, Date, Amount, FirmID
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic

% read the PE fund cash flow data 
% NB: MAKE SURE THIS FILE IS SORTED FIRST BY FUND ID, SECOND BY TRANSACTION DATE!
if debug
    % read the PE fund cash flow data text file
    inCF = readtable('FakeData.xlsx');
    inCF_fundid     = inCF.FundID;              % fund identifier
    inCF_vintage    = inCF.Vintage;             % vintage year
    inCF_class      = inCF.Class;               % VC, Buyout, ...
    inCF_subclass   = inCF.SubClass;            % for VC: early stage, late stage, generalist
    inCF_subfocus   = inCF.SubFocus;            % geography: to filter out non-US funds
    inCF_size       = inCF.FundSizeUSD;         % fund size
    inCF_transdate  = inCF.Date;                % calendar dates of contributions, distributions, valuations
    inCF_transtype  = inCF.TransactionType;     % type of transaction (contribution, distribution, valuation)
    inCF_transamt   = inCF.AmountUSD;           % amount of transaction in USD
    inCF_firmid     = inCF.FirmID;              % GP identifier
else
    [inCF_fundid, ~, inCF_vintage, ~, inCF_class, inCF_subclass, ~, ~, inCF_subfocus, ~, ~, ~, inCF_size, inCF_transdate, inCF_transtype, ~, inCF_transamt, inCF_firmid, ~] ...
        = textread([indir inname],'%n%s%n%s%s%s%s%s%s%s%s%n%n%s%s%f%f%n%s','headerlines',1,'delimiter',',','emptyvalue',NaN);
end

% read 5 factor return data (from Ken French's website)
s = textread('F-F_Research_Data_5_Factors_2x3_daily.csv','','headerlines',4,'delimiter',',','emptyvalue',NaN);
FF_date = s(:,1);
FF_datenum = datenum(floor(s(:,1)/10000), floor(s(:,1)/100)-100*floor(s(:,1)/10000),rem(s(:,1),100));
FF_rmrf = s(:,2)/100;    % daily rm-rf
FF_rSMB = s(:,3)/100;    % daily return to SMB ptf
FF_rHML = s(:,4)/100;    % daily return to HML ptf
FF_rRMW = s(:,5)/100;    % daily return to RMW ptf
FF_rCMA = s(:,6)/100;    % daily return to CMA ptf
FF_rf = s(:,7)/100;      % daily rf
FF_rm = FF_rf + FF_rmrf;

% read size/BM portfolio return data (from Ken French's website)
s = textread('6_Portfolios_2x3_daily.csv','','headerlines',19,'delimiter',',','emptyvalue',NaN);
FF6_datenum = datenum(floor(s(:,1)/10000), floor(s(:,1)/100)-100*floor(s(:,1)/10000),rem(s(:,1),100));
FF6_SL = s(:,2)/100;    % small-low b/m portfolio returns
FF6_SM = s(:,3)/100;    % small-medium b/m portfolio returns
FF6_SH = s(:,4)/100;    % small-high b/m portfolio returns
FF6_BL = s(:,5)/100;    % big-low b/m portfolio returns
FF6_BM = s(:,6)/100;    % big-medium b/m portfolio returns
FF6_BH = s(:,7)/100;    % big-high b/m portfolio returns

% read op/size portfolio return data (from Ken French's website)
s = textread('6_Portfolios_ME_OP_2x3_daily.csv','','headerlines',23,'delimiter',',','emptyvalue',NaN);
FF6MEOP_datenum = datenum(floor(s(:,1)/10000), floor(s(:,1)/100)-100*floor(s(:,1)/10000),rem(s(:,1),100));
FF6_SLop = s(:,2)/100;    % small-low profit portfolio returns
FF6_SMop = s(:,3)/100;    % small-medium profit portfolio returns
FF6_SHop = s(:,4)/100;    % small-high profit portfolio returns
FF6_BLop = s(:,5)/100;    % big-low profit portfolio returns
FF6_BMop = s(:,6)/100;    % big-medium profit portfolio returns
FF6_BHop = s(:,7)/100;    % big-high profit portfolio returns

% read the inflation data (from FRED)
[CPI_date, CPI] = textread([indir 'CPIAUCSL.csv'],'%d%f','headerlines',1,'delimiter',',','emptyvalue',NaN);

% initialize data variables
PEfund_fundid   = zeros(maxfunds,1);
PEfund_firmid   = ones(maxfunds,1)*NaN;
PEfund_seqnr    = ones(maxfunds,1)*NaN;
PEfund_vintage  = zeros(maxfunds,1);
PEfund_size     = zeros(maxfunds,1);
PEfund_size1990 = zeros(maxfunds,1);
PEfund_type     = zeros(maxfunds,1);
PEfund_subtype  = zeros(maxfunds,1);
PEfund_finalNAV = zeros(maxfunds,1);
PEfund_nextfundid   = ones(maxfunds,1)*NaN;
PEfund_nextvintage  = ones(maxfunds,1)*NaN;
PEfund_nextsize     = ones(maxfunds,1)*NaN;
PEfund_nextsize1990 = ones(maxfunds,1)*NaN;
PEfund_numCF    = zeros(maxfunds,1);
PEfund_datenums = zeros(maxfunds,maxnumCF);
PEfund_T        = zeros(maxfunds,maxnumCF);
PEfund_CF       = zeros(maxfunds,maxnumCF);
PEfund_liq90    = zeros(maxfunds,1);
PEfund_liq95    = zeros(maxfunds,1);
PEfund_liq100   = zeros(maxfunds,1);
PEfund_TVPI     = zeros(maxfunds,1);
PEfund_PME      = zeros(maxfunds,1);
PEfund_IRR      = zeros(maxfunds,1);
PEfund_Rf       = zeros(maxfunds,maxnumCF);
PEfund_Rm       = zeros(maxfunds,maxnumCF);
PEfund_Rm2      = zeros(maxfunds,maxnumCF); % for 'levered' PME with 'beta'=2
PEfund_Rsmb     = zeros(maxfunds,maxnumCF);
PEfund_Rhml     = zeros(maxfunds,maxnumCF);
PEfund_Rrmw     = zeros(maxfunds,maxnumCF);
PEfund_Rcma     = zeros(maxfunds,maxnumCF);
PEfund_RS       = zeros(maxfunds,maxnumCF);
PEfund_RS2      = zeros(maxfunds,maxnumCF);
PEfund_RH       = zeros(maxfunds,maxnumCF);
PEfund_RH2      = zeros(maxfunds,maxnumCF);
PEfund_RL       = zeros(maxfunds,maxnumCF);
PEfund_Rmvol    = zeros(maxfunds,maxnumCF);
PEfund_RB       = zeros(maxfunds,maxnumCF);
PEfund_RSL      = zeros(maxfunds,maxnumCF);
PEfund_RSM      = zeros(maxfunds,maxnumCF);
PEfund_RSH      = zeros(maxfunds,maxnumCF);
PEfund_RBL      = zeros(maxfunds,maxnumCF);
PEfund_RBM      = zeros(maxfunds,maxnumCF);
PEfund_RBH      = zeros(maxfunds,maxnumCF);
PEfund_RSLop    = zeros(maxfunds,maxnumCF);
PEfund_RSMop    = zeros(maxfunds,maxnumCF);
PEfund_RSHop    = zeros(maxfunds,maxnumCF);
PEfund_RBLop    = zeros(maxfunds,maxnumCF);
PEfund_RBMop    = zeros(maxfunds,maxnumCF);
PEfund_RBHop    = zeros(maxfunds,maxnumCF);
PEfund_RLop     = zeros(maxfunds,maxnumCF);
PEfund_RHop     = zeros(maxfunds,maxnumCF);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Go through the PE funds one by one
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
cnt = 0;    % keep total count of included funds
i   = 1;    % current row in the input file
while (i < length(inCF_fundid))   % keep processing until run out of data
    cur_fundid  = inCF_fundid(i);   % current fund id    
    
    % Determine current fund's starting and ending rows in input data
    strti = i;                % starting row in input data
    while (i <= length(inCF_fundid)) && (inCF_fundid(i) == cur_fundid)
        i = i + 1; 
    end
    endi = i - 1;            % ending row in input data
    
    % Check if fund is VC or Buyout
    isVC = false; if strcmpi(inCF_class(strti), 'Venture Capital'), isVC = true; end
    isBO = false; if strcmpi(inCF_class(strti), 'Buyout'), isBO = true; end
    isGE = false; if strcmpi(inCF_class(strti), 'Expansion Capital'), isGE = true; end
    
    % Determine fund size in 1990 millions of U.S. Dollars
    size1990    = (inCF_size(strti) / 1000000) ...
                    * CPI(CPI_date==1990) / CPI(CPI_date == inCF_vintage(strti));     
%     if ~strcmp(inCF_currency,'USD'), size1990 = NaN; end  % check if in USD
    
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Now process the fund, if it satisfies all of these conditions:
    % 1) VC or Buyout or Growth Equity fund (no Fund of Funds!)
    % 2) Vintage 2012 or earlier
    % 3) >= $5m in AUM in 1990 dollars
    % 4) U.S.-focused
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    if (isVC || isBO ||isGE) && (inCF_vintage(strti) <= 2016) && ...
        (size1990 > 5) && strcmp(inCF_subfocus(strti),'United States')
        
        disp(['Processing fund #' num2str(inCF_fundid(strti))]);
    
        % Convert all fund transaction dates into Matlab datenum format.
        % Resort all transactions first by date (earliest to latest), then by
        % transaction type, with valuations last (since any valuations that
        % coincide with distributions are post-distribution).
        transdatenum    = datenum(inCF_transdate(strti:endi));
        transtype       = zeros(endi-strti+1,1);
        % recode transtype to 1 (contribution), 2 (distribution), 3(valuation), for sorting.
        transtype(strcmp('Contribution', inCF_transtype(strti:endi))) = 1;
        transtype(strcmp('Distribution', inCF_transtype(strti:endi))) = 2;
        transtype(strcmp('Valuation', inCF_transtype(strti:endi))) = 3;
        transamt        = inCF_transamt(strti:endi);
        Z               = sortrows([transdatenum transtype transamt], [1 2]); % sort first by date, then by transaction type
        transdatenum    = Z(:,1);
        transtype       = Z(:,2);
        transamt        = Z(:,3);
        
        % Drop any transactions after the data cutoff date (these are incomplete)
        while (transdatenum(end) > maxdate)
            transdatenum(end)   = [];
            transtype(end)      = [];
            transamt(end)       = [];
        end        
               
        % Determine final observed NAV, which is the last observed "Valuation"
        % (transtype = 3) plus/minus any later contributions/distributions.
        % Adjust the date of the final NAV to be the date of the last cash flow.
        j = length(transtype); while (j > 0) && (transtype(j) ~= 3), j = j - 1; end
        if (j > 0)  % catch funds with not a single reported NAV (usually very young funds; not usable)
            finalNAV                = transamt(j);
            finalNAV_datenum        = transdatenum(j);
            j                       = j + 1;
            while (j <= length(transamt))    % process all cash flows after final reported NAV
                if (transamt(j) ~= 0)   % if zero contr/distr then don't update the final NAV date
                    finalNAV            = finalNAV - transamt(j);
                    finalNAV_datenum    = transdatenum(j);
                end
                j                   = j + 1;
            end
        
            % Check that the first transaction is a contribution
            while (transtype(1) ~= 1)
                disp(['Fund #' num2str(inCF_fundid(strti)) ': ' inCF_transtype(1) ' before first Contribution']);
                transdatenum(1)     = [];   % delete any transactions preceding the first contribution
                transtype(1)        = [];
                transamt(1)         = [];
            end

            % Drop any trailing zeros (not a big deal, but helps to speed things up)
            while (transamt(end) == 0)
                transdatenum(end)   = [];
                transtype(end)      = [];
                transamt(end)       = [];
            end
        else
            transdatenum = [];      % not a single NAV reported for this fund, will be skipped
        end
        
        % If there is valid transaction data for this fund, process it
        if ~isempty(transdatenum)    % skip any empty funds 
            
            cnt = cnt + 1;      % increase fund counter
        
            % Basic fund characteristics
            PEfund_fundid(cnt)      = inCF_fundid(strti);
            PEfund_firmid(cnt)      = inCF_firmid(strti);   % missing = NaN
            PEfund_vintage(cnt)     = inCF_vintage(strti);
            PEfund_size(cnt)        = inCF_size(strti) / 1000000;
            PEfund_size1990(cnt)    = size1990;       
            % determine the fund's asset class
            if isVC, PEfund_type(cnt,1) = 1; end
            if isBO, PEfund_type(cnt,1) = 2; end
            if isGE, PEfund_type(cnt,1) = 3; end
            % and subclass (for VC)
            PEfund_subtype(cnt)     = 0;    
            if strcmpi(inCF_subclass(strti), 'Generalist') , PEfund_subtype(cnt,1) = 1; end
            if strcmpi(inCF_subclass(strti), 'Early Stage'), PEfund_subtype(cnt,1) = 2; end
            if strcmpi(inCF_subclass(strti), 'Late Stage') , PEfund_subtype(cnt,1) = 3; end  

            
            % Add the final NAV as a separate distribution (if includeNAV==1)
            % This may result in two cash flows on the same date, but that's ok. 
            % NB: NAVs are ignored in the processing of cash flows below, so there is no double-counting
            PEfund_finalNAV(cnt)    = max(0, finalNAV); % take care of cases where final distributions make NAV negative
            if (includeNAV == 1) && (PEfund_finalNAV(cnt) > 0)
                transdatenum    = [transdatenum; finalNAV_datenum];
                transtype       = [transtype   ; 2];   % Distribution: transtype = 2
                transamt        = [transamt    ; PEfund_finalNAV(cnt)]; 
            end
            
        
            % Construct matrix with cash flows and a matrix with time-since-fund-inception
            distr               = zeros(1,length(transamt));   % separate vector for distributions
            contr               = zeros(1,length(transamt));   % separate vector for contributions
            cntCF               = 0;
            for j = 1:length(transamt)
                % Enter the cash flows into the matrix. Notes:
                % 1) Calls are sometimes positive: 
                %       Can occur if the fund manager calls up capital for a deal, but for whatever reason the deal falls through or a smaller amount of capital is required. The GP refunds some capital to the LPs, which can be called later.
                %       Just treat as a negative call for the purpose of computing TVPI, IRR, and (G)PME.
                % 2) Distributions are sometimes negative: 
                %       Reflects the occurrence of recallable distributions. Fund managers often have the option of recalling a certain proportion of the distributions made to investors for further investments, effectively recycling commitments.
                %       For TVPI, per GIPS standards, recallable distributions should be considered included in the numerator of this ratio. Any reinvested capital (resulting from recallable distributions) should be included in the contributions.
                %       As a result, cumulative calls can exceed fund size
                % NB: These issues only matters for TVPI and traditional PME, where there is a distinction between calls and distributions. 
                %     GPME and IRR only care about net cash flows, and whether a cash flow is considered a positive or negative call or distribution is inconsequential
                if (transtype(j) == 1) || (transtype(j) == 2) % if contribution or distribution (i.e., skip all valuations)
                    cntCF                       = cntCF + 1;
                    PEfund_CF(cnt,cntCF)        = transamt(j);     
                    PEfund_datenums(cnt,cntCF)  = transdatenum(j);
                    
                    % Compute time since fund inception (in years)
                    PEfund_T(cnt,cntCF)         = (transdatenum(j) - transdatenum(1))/365;
                    
                    % separate contributions and distributions (only used for computing TVPI and PME)
                    if (transtype(j) == 1)  % contribution 
                    	contr(cntCF)            = -transamt(j); 
                    else                    % distribution (will include final NAV if includeNAV==1)
                        if transamt(j) > 0  % normal distribution
                            distr(cntCF)        = transamt(j); 
                        else                % negative distribution, should be considered a contribution (this pulls TVPI and PME towards 1)
                            contr(cntCF)        = -transamt(j); 
                        end
                    end
                    
                    % Compute the matching factor returns between fund firstdate and transaction date
                    IIstrt = find(FF_datenum == transdatenum(1)); 
                    if isempty(IIstrt)  % if no exact date match, find closest prior or next date in Fama-French data
                        klag = 1 ; while isempty(find(FF_datenum == transdatenum(1)-klag, 1)) , klag = klag + 1;   end
                        klead = 1; while isempty(find(FF_datenum == transdatenum(1)+klead, 1)), klead = klead + 1; end
                        if klag < klead
                            IIstrt  = find(FF_datenum == transdatenum(1)-klag);     % go before
                        else
                            IIstrt  = find(FF_datenum == transdatenum(1)+klead);    % go after
                        end
                    end
                    IIend  = find(FF_datenum == transdatenum(j));
                    if isempty(IIend)  % if no exact date match, find closest prior or next date in Fama-French data
                        klag = 1 ; while isempty(find(FF_datenum == transdatenum(j)-klag, 1)) , klag = klag + 1;   end
                        klead = 1; while isempty(find(FF_datenum == transdatenum(j)+klead, 1)), klead = klead + 1; end
                        if klag < klead
                            IIend   = find(FF_datenum == transdatenum(j)-klag);     % go before
                        else
                            IIend   = find(FF_datenum == transdatenum(j)+klead);    % go after
                        end
                    end
                    PEfund_Rm(cnt,cntCF)       = prod(1+FF_rm(IIstrt+1:IIend));
                    PEfund_Rm2(cnt,cntCF)      = prod(1+2*FF_rm(IIstrt+1:IIend));  % for 'levered' PME with 'beta'=2
                    PEfund_Rsmb(cnt,cntCF)     = prod(1+FF_rSMB(IIstrt+1:IIend));
                    PEfund_RS2(cnt,cntCF)      = prod(1+FF_rf(IIstrt+1:IIend)+FF_rSMB(IIstrt+1:IIend));
                    PEfund_RH2(cnt,cntCF)      = prod(1+FF_rf(IIstrt+1:IIend)+FF_rHML(IIstrt+1:IIend));
                    PEfund_Rhml(cnt,cntCF)     = prod(1+FF_rHML(IIstrt+1:IIend));
                    PEfund_Rrmw(cnt,cntCF)     = prod(1+FF_rRMW(IIstrt+1:IIend));
                    PEfund_Rcma(cnt,cntCF)     = prod(1+FF_rCMA(IIstrt+1:IIend));
                    PEfund_Rf(cnt,cntCF)       = prod(1+FF_rf(IIstrt+1:IIend));             
                                                                                      
                    PEfund_Rmvol(cnt,cntCF)    = std(FF_rm(IIstrt+1:IIend));

                    if (IIstrt < IIend) 
                        PEfund_Rmvar(cnt,cntCF) = var(FF_rm(IIstrt+1:IIend)) * PEfund_T(cnt,cntCF) * 250;
                    else
                        PEfund_Rmvar(cnt,cntCF) = 0;
                    end                    
                    
                    IIstrt = find(FF6_datenum == transdatenum(1)); 
                    if isempty(IIstrt)  % if no exact date match, find closest prior or next date in Fama-French data
                        klag = 1 ; while isempty(find(FF6_datenum == transdatenum(1)-klag, 1)) , klag = klag + 1;   end
                        klead = 1; while isempty(find(FF6_datenum == transdatenum(1)+klead, 1)), klead = klead + 1; end
                        if klag < klead
                            IIstrt  = find(FF6_datenum == transdatenum(1)-klag);     % go before
                        else
                            IIstrt  = find(FF6_datenum == transdatenum(1)+klead);    % go after
                        end
                    end
                    IIend  = find(FF6_datenum == transdatenum(j));
                    if isempty(IIend)
                        klag = 1 ; while isempty(find(FF6_datenum == transdatenum(j)-klag, 1)) , klag = klag + 1;   end
                        klead = 1; while isempty(find(FF6_datenum == transdatenum(j)+klead, 1)), klead = klead + 1; end
                        if klag < klead
                            IIend   = find(FF6_datenum == transdatenum(j)-klag);     % go before
                        else
                            IIend   = find(FF6_datenum == transdatenum(j)+klead);    % go after
                        end
                    end
                    PEfund_RSL(cnt,cntCF)      = prod(1+FF6_SL(IIstrt+1:IIend));
                    PEfund_RSM(cnt,cntCF)      = prod(1+FF6_SM(IIstrt+1:IIend));
                    PEfund_RSH(cnt,cntCF)      = prod(1+FF6_SH(IIstrt+1:IIend));
                    PEfund_RS(cnt,cntCF)       = prod(1+(FF6_SL(IIstrt+1:IIend)+FF6_SM(IIstrt+1:IIend)+FF6_SH(IIstrt+1:IIend))/3 );
                    PEfund_RB(cnt,cntCF)       = prod(1+(FF6_BL(IIstrt+1:IIend)+FF6_BM(IIstrt+1:IIend)+FF6_BH(IIstrt+1:IIend))/3 );
                    PEfund_RBL(cnt,cntCF)      = prod(1+FF6_BL(IIstrt+1:IIend));
                    PEfund_RBM(cnt,cntCF)      = prod(1+FF6_BM(IIstrt+1:IIend));
                    PEfund_RBH(cnt,cntCF)      = prod(1+FF6_BH(IIstrt+1:IIend));   
                    PEfund_RH(cnt,cntCF)       = prod(1+(FF6_SH(IIstrt+1:IIend)+FF6_BH(IIstrt+1:IIend))/2 );
                    PEfund_RL(cnt,cntCF)       = prod(1+(FF6_SL(IIstrt+1:IIend)+FF6_BL(IIstrt+1:IIend))/2 );
                    
                    IIstrt = find(FF6MEOP_datenum == transdatenum(1)); 
                    if isempty(IIstrt)  % if no exact date match, find closest prior or next date in Fama-French data
                        klag = 1 ; while isempty(find(FF6MEOP_datenum == transdatenum(1)-klag, 1)) , klag = klag + 1;   end
                        klead = 1; while isempty(find(FF6MEOP_datenum == transdatenum(1)+klead, 1)), klead = klead + 1; end
                        if klag < klead
                            IIstrt  = find(FF6MEOP_datenum == transdatenum(1)-klag);     % go before
                        else
                            IIstrt  = find(FF6MEOP_datenum == transdatenum(1)+klead);    % go after
                        end
                    end
                    IIend  = find(FF6MEOP_datenum == transdatenum(j));
                    if isempty(IIend)
                        klag = 1 ; while isempty(find(FF6MEOP_datenum == transdatenum(j)-klag, 1)) , klag = klag + 1;   end
                        klead = 1; while isempty(find(FF6MEOP_datenum == transdatenum(j)+klead, 1)), klead = klead + 1; end
                        if klag < klead
                            IIend   = find(FF6MEOP_datenum == transdatenum(j)-klag);     % go before
                        else
                            IIend   = find(FF6MEOP_datenum == transdatenum(j)+klead);    % go after
                        end
                    end             
                    PEfund_RSLop(cnt,cntCF) = prod(1+FF6_SLop(IIstrt+1:IIend));
                    PEfund_RSMop(cnt,cntCF) = prod(1+FF6_SMop(IIstrt+1:IIend));
                    PEfund_RSHop(cnt,cntCF) = prod(1+FF6_SHop(IIstrt+1:IIend));
                    PEfund_RBLop(cnt,cntCF) = prod(1+FF6_BLop(IIstrt+1:IIend));
                    PEfund_RBMop(cnt,cntCF) = prod(1+FF6_BMop(IIstrt+1:IIend));
                    PEfund_RBHop(cnt,cntCF) = prod(1+FF6_BHop(IIstrt+1:IIend));
                    PEfund_RHop(cnt,cntCF)  = prod(1+(FF6_SHop(IIstrt+1:IIend)+FF6_BHop(IIstrt+1:IIend))/2 );         
                    PEfund_RLop(cnt,cntCF)  = prod(1+(FF6_SLop(IIstrt+1:IIend)+FF6_BLop(IIstrt+1:IIend))/2 );   
                                              
                end % if (transtype = 1 or 2)                         
            end % for j

            % Count # unique net cash flows
            dn = PEfund_datenums(cnt,:);
            PEfund_numCF(cnt)       = length(unique(dn(dn>0)));
            
            % Determine liquidation status
            PEfund_liq100(cnt,1)    = 0;
            PEfund_liq95(cnt,1)     = 0;
            PEfund_liq90(cnt,1)     = 0;   
            if PEfund_vintage(cnt,1) < 2009 % any fund less than 10y old will not be liquidated yet
                if (PEfund_finalNAV(cnt) == 0)
                    PEfund_liq100(cnt,1) = 1;   % fund is fully liquidated
                end
                if ((PEfund_finalNAV(cnt)/1000000) / PEfund_size(cnt) <= .05)	% NB: PEfund_size is in $m
                    PEfund_liq95(cnt,1) = 1; 
                end
                if ((PEfund_finalNAV(cnt)/1000000) / PEfund_size(cnt) <= .1)
                    PEfund_liq90(cnt,1) = 1;
                end
            end
            
            % compute TVPI (don't use CF because a negative distribution is not a capital call)
            PEfund_TVPI(cnt) = sum(distr) / sum(contr);  % NB: Final NAV is included in distr (if includeNAV==1), so don't add it again
                        
            % compute PME (don't use CF because a negative distribution is not a capital call)
            discdistr = distr(1:cntCF)./PEfund_Rm(cnt,1:cntCF);  % NB: Final NAV is included in distr (if includeNAV==1), so don't add it again
            disccontr = contr(1:cntCF)./PEfund_Rm(cnt,1:cntCF);
            PEfund_PME(cnt) = sum(discdistr) / sum(disccontr); 
            
            % compute IRR
            startIRR = [5; 2; 1; .5; .4; .3; .2; .1; 0; -.1; -.2; -.3; -.4; -.5];	% try different starting values;
            curmin = 100000000;
            options = optimset; options = optimset(options, 'display', 'off');
            PEfund_IRR(cnt) = NaN;
            for j = 1:length(startIRR)
                try
                    [IRR_try, fminval, exitflag] = fzero(@(x) sum(PEfund_CF(cnt,:)./((1+x).^min(10,PEfund_T(cnt,:)))), startIRR(j), options);    % the min(10,PEfund_T) is because this is how Preqin calculates IRRs, but is nominally wrong!!    
                catch
                end
                if (abs(fminval) < curmin) && (exitflag > 0)
                    PEfund_IRR(cnt) = IRR_try*100;
                    curmin = fminval;
                end
            end   
            if (sum(contr) > 0) && (sum(distr) == 0)
                PEfund_IRR(cnt) = -100;
            end            
            
        else    % fund is empty: no valid transaction data
            disp(['Fund #' num2str(inCF_fundid(strti)) ' is empty or has no NAV: Not included in sample']);
        end  % if ~isempty(transdatenum)
    end % if (isVC II isBO) && ...    
end % outer while loop


% Delete trailing (unused) zero rows and columns
varlist = who('PEfund_*','R*');
lastrow = length(PEfund_fundid); while (PEfund_fundid(lastrow) == 0), lastrow = lastrow - 1; end
lastcol = size(PEfund_CF,2); while (sum(PEfund_CF(:,lastcol)==0) == maxfunds), lastcol = lastcol - 1; end
for i = 1:length(varlist)
    eval([varlist{i} '(:,lastcol+1:end) = [];']);
    eval([varlist{i} '(lastrow+1:end,:) = [];']);
end


% Construct fund sequence numbers for each GP in the data, and characteristics of the next fund by the same GP
% Assign sequence number within fund type (VC vs buyout) but don't care whether funds qualify or not on other dimensions (e.g. size, location)
% Thus, the final data set does not necessarily contain each number of a GP's sequence!
for i = 1:length(PEfund_fundid)
    % Try to find data for the next fund of the same firm (used for regressions below)            
    II = find(inCF_firmid == PEfund_firmid(i)); % returns empty set if PEfund_firmid(i) = NaN
    if ~isempty(II)
        nextset = [inCF_vintage(II) inCF_fundid(II) II inCF_size(II)/1000000]; 
        nexttype = zeros(length(II),1);
        for j = 1:length(II)
            if strcmpi(inCF_class(II(j)), 'Venture Capital'), nexttype(j) = 1; end
            if strcmpi(inCF_class(II(j)), 'Buyout')         , nexttype(j) = 2; end
            if strcmpi(inCF_class(II(j)), 'Expansion Capital'), nexttype(j) = 3; end
        end
        nextset = nextset(nexttype==PEfund_type(i), :);     % only keep funds of the same type  
        [~,II] = unique(nextset(:,2));                      % only keep one record for each fund
        nextset = nextset(II,:);
        nextset = sortrows(nextset,1);                      % sort by vintageyear   
        nextset = [nextset [1:size(nextset,1)]'];           % add sequence numbers
        % determine the present fund's sequence number
        IIcur = find(nextset(:,2) == PEfund_fundid(i));    % find current fund within nextset
        PEfund_seqnr(i) = nextset(IIcur,end);
        % now determine next fund characteristics (for regressions below)
        IIcur = IIcur + 1; done = false;
        while ~done
            if (IIcur > size(nextset,1))
                done = true;
            else
                if strcmp(inCF_subfocus(nextset(IIcur,3)),'United States')   % skip non-US: could be different team/skill 
                    PEfund_nextfundid(i)    = nextset(IIcur,2);
                    PEfund_nextvintage(i)   = nextset(IIcur,1);
                    PEfund_nextsize(i)      = nextset(IIcur,4);
                    PEfund_nextsize1990(i)  = PEfund_nextsize(i) * CPI(CPI_date==1990) / CPI(CPI_date == PEfund_nextvintage(i)); 
                    done = true;
                end
                IIcur = IIcur + 1;
            end
        end
    end
end


% Save a temporary local data file (NOT TO BE SENT BACK, WILL BE DELETED)
save ('temp', 'PEfund*', 'includeNAV');

toc


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Produce Summary Statistics 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

disp(' ')
disp('Compute summary statistics')
disp(' ')

for liq = 0:3   % repeat tables for each liquidation status
    
    disp('***************************************************************')
    switch liq
        case 0, s = '';        d = 'All funds';             disp(['* ' d]); IIliq = ones(size(PEfund_vintage))>0;
        case 1, s = '_liq100'; d = '100% Liquidated funds'; disp(['* ' d]); IIliq = (PEfund_liq100 == 1);
        case 2, s = '_liq95';  d = '95% Liquidated funds';  disp(['* ' d]); IIliq = (PEfund_liq95 == 1);
        case 3, s = '_liq90';  d = '90% Liquidated funds';  disp(['* ' d]); IIliq = (PEfund_liq90 == 1);
    end
    disp('***************************************************************')
    disp(' ')

    stats_years = (min(PEfund_vintage):max(PEfund_vintage))';
    eval(['stats_numfunds' s '          = ones(length(stats_years)+7,6)*NaN;']);
    eval(['stats_numfirms' s '          = ones(length(stats_years)+7,6)*NaN;']);
    eval(['stats_fundsperfirm' s '      = ones(length(stats_years)+7,9,6)*NaN;']);
    eval(['stats_missingfirms' s '      = ones(length(stats_years)+7,6)*NaN;']);
    eval(['stats_numCFperfund' s '      = ones(length(stats_years)+7,9,6)*NaN;']);
    eval(['stats_fundsize' s '          = ones(length(stats_years)+7,9,6)*NaN;']);
    eval(['stats_effectiveT' s '        = ones(length(stats_years)+7,9,6)*NaN;']);
    eval(['stats_TVPI' s '              = ones(length(stats_years)+7,11,6)*NaN;']);
    eval(['stats_IRR' s '               = ones(length(stats_years)+7,11,6)*NaN;']);
    eval(['stats_PME' s '               = ones(length(stats_years)+7,11,6)*NaN;']);
    eval(['stats_TVPIwtd' s '           = ones(length(stats_years)+7,2,6)*NaN;']);
    eval(['stats_IRRwtd' s '            = ones(length(stats_years)+7,2,6)*NaN;']);
    eval(['stats_PMEwtd' s '            = ones(length(stats_years)+7,2,6)*NaN;']);
    
    % Compute statistics (# funds, # firms, TVPI, etc) by vintage year, and across all years
    for i = 1:length(stats_years)+7

        % Construct matrix with # funds by vintage year (rows) and type (columns: VC_all BO GE)
        % first row is <=1980, second row is <=1985
        % final 5 rows are by decade (<1990, 1990-99, 2000-09, 2010 onwards) and across all years, respectively.
        if i == 1
            II = IIliq & (PEfund_vintage <= 1980);
        elseif i == 2
            II = IIliq & (PEfund_vintage <= 1985);
        elseif i <= length(stats_years)+2
            II = IIliq & (PEfund_vintage == stats_years(i-2));
        elseif i == length(stats_years)+3
            II = IIliq & (PEfund_vintage <= 1989);
        elseif i == length(stats_years)+4
            II = IIliq & (PEfund_vintage >= 1990) & (PEfund_vintage <= 1999);
        elseif i == length(stats_years)+5
            II = IIliq & (PEfund_vintage >= 2000) & (PEfund_vintage <= 2009);               
        elseif i == length(stats_years)+6
            II = IIliq & (PEfund_vintage >= 2010);    
        elseif i == length(stats_years)+7
            II = IIliq & (ones(size(PEfund_vintage))>0);    % across all years
        end
        
        eval(['stats_numfunds' s '(i,:) = [sum(PEfund_type(II)==1) sum(PEfund_type(II)==1 & PEfund_subtype(II)==1) sum(PEfund_type(II)==1 & PEfund_subtype(II)==2) sum(PEfund_type(II)==1 & PEfund_subtype(II)==3) sum(PEfund_type(II)==2) sum(PEfund_type(II)==3)];']);

        % Matrix with # firms (same structure as stats_numfunds)
        eval(['stats_numfirms' s '(i,:) = [sum(~isnan(unique(PEfund_firmid(II & PEfund_type==1)))) sum(~isnan(unique(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==1)))) '...
            'sum(~isnan(unique(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==2)))) sum(~isnan(unique(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==3)))) sum(~isnan(unique(PEfund_firmid(II & PEfund_type==2)))) sum(~isnan(unique(PEfund_firmid(II & PEfund_type==3))))];']); 

        % Summary stats of # funds per firm
        % 3D Matrix with vintage year first, stats second ([mean, stdev, min, p10, p25, median, p75, p90, max]), and type/subtype third ([VC_all VC_balanced VC_early VC_late BO GE])
        % first row is <=1980, second row is <=1985
        % final 5 rows are by decade (<1990, 1990-99, 2000-09, 2010 onwards) and across all years, respectively.
        firmid = unique(PEfund_firmid(II & PEfund_type==1)); firmid(isnan(firmid)) = []; % list of unique firms, not counting missing firmid (= NaN)
        fpf = zeros(length(firmid),1); for j = 1:length(firmid), fpf(j) = sum(PEfund_firmid(II & PEfund_type==1) == firmid(j)); end   % count # funds per firm
        if ~isempty(fpf), eval(['stats_fundsperfirm' s '(i,:,1) = [mean(fpf) std(fpf) min(fpf) prctile(fpf,[10 25 50 75 90]) max(fpf)];']); end
        for k = 1:3
            firmid = unique(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==k)); firmid(isnan(firmid)) = []; % list of unique firms, not counting missing firmid (= NaN)
            fpf = zeros(length(firmid),1); for j = 1:length(firmid), fpf(j) = sum(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==k) == firmid(j)); end   % count # funds per firm
            if ~isempty(fpf), eval(['stats_fundsperfirm' s '(i,:,k+1) = [mean(fpf) std(fpf) min(fpf) prctile(fpf,[10 25 50 75 90]) max(fpf)];']); end
        end    
        firmid = unique(PEfund_firmid(II & PEfund_type==2)); firmid(isnan(firmid)) = []; % list of unique firms, not counting missing firmid (= NaN)
        fpf = zeros(length(firmid),1); for j = 1:length(firmid), fpf(j) = sum(PEfund_firmid(II & PEfund_type==2) == firmid(j)); end   % count # funds per firm
        if ~isempty(fpf), eval(['stats_fundsperfirm' s '(i,:,5) = [mean(fpf) std(fpf) min(fpf) prctile(fpf,[10 25 50 75 90]) max(fpf)];']); end
        firmid = unique(PEfund_firmid(II & PEfund_type==3)); firmid(isnan(firmid)) = []; % list of unique firms, not counting missing firmid (= NaN)
        fpf = zeros(length(firmid),1); for j = 1:length(firmid), fpf(j) = sum(PEfund_firmid(II & PEfund_type==3) == firmid(j)); end   % count # funds per firm
        if ~isempty(fpf), eval(['stats_fundsperfirm' s '(i,:,6) = [mean(fpf) std(fpf) min(fpf) prctile(fpf,[10 25 50 75 90]) max(fpf)];']); end
        % Count # funds with missing GP identifiers
        eval(['stats_missingfirms' s '(i,:) = [sum(isnan(PEfund_firmid(II & PEfund_type==1))) sum(isnan(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==1))) '...
            'sum(isnan(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==2))) sum(isnan(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==3))) sum(isnan(PEfund_firmid(II & PEfund_type==2))) sum(isnan(PEfund_firmid(II & PEfund_type==3)))];']); 
        
        % # Cash flows per fund (same structure as stats_fundsperfirm)
        x = PEfund_numCF(II & PEfund_type==1);    if ~isempty(x), eval(['stats_numCFperfund' s '(i,:,1) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_numCF(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), eval(['stats_numCFperfund' s '(i,:,2) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_numCF(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), eval(['stats_numCFperfund' s '(i,:,3) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_numCF(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), eval(['stats_numCFperfund' s '(i,:,4) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_numCF(II & PEfund_type==2);    if ~isempty(x), eval(['stats_numCFperfund' s '(i,:,5) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_numCF(II & PEfund_type==3);    if ~isempty(x), eval(['stats_numCFperfund' s '(i,:,6) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end

        x = PEfund_size(II & PEfund_type==1);    if ~isempty(x), eval(['stats_fundsize' s '(i,:,1) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_size(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), eval(['stats_fundsize' s '(i,:,2) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_size(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), eval(['stats_fundsize' s '(i,:,3) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_size(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), eval(['stats_fundsize' s '(i,:,4) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_size(II & PEfund_type==2);    if ~isempty(x), eval(['stats_fundsize' s '(i,:,5) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_size(II & PEfund_type==3);    if ~isempty(x), eval(['stats_fundsize' s '(i,:,6) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end

        x = max(PEfund_T(II & PEfund_type==1,:),[],2);    if ~isempty(x), eval(['stats_effectiveT' s '(i,:,1) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = max(PEfund_T(II & PEfund_type==1 & PEfund_subtype==1,:),[],2); if ~isempty(x), eval(['stats_effectiveT' s '(i,:,2) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = max(PEfund_T(II & PEfund_type==1 & PEfund_subtype==2,:),[],2); if ~isempty(x), eval(['stats_effectiveT' s '(i,:,3) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = max(PEfund_T(II & PEfund_type==1 & PEfund_subtype==3,:),[],2); if ~isempty(x), eval(['stats_effectiveT' s '(i,:,4) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = max(PEfund_T(II & PEfund_type==2,:),[],2);    if ~isempty(x), eval(['stats_effectiveT' s '(i,:,5) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = max(PEfund_T(II & PEfund_type==3,:),[],2);    if ~isempty(x), eval(['stats_effectiveT' s '(i,:,6) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end

        x = PEfund_TVPI(II & PEfund_type==1);    if ~isempty(x), eval(['stats_TVPI' s '(i,:,1) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_TVPI(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), eval(['stats_TVPI' s '(i,:,2) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_TVPI(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), eval(['stats_TVPI' s '(i,:,3) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_TVPI(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), eval(['stats_TVPI' s '(i,:,4) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_TVPI(II & PEfund_type==2);    if ~isempty(x), eval(['stats_TVPI' s '(i,:,5) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_TVPI(II & PEfund_type==3);    if ~isempty(x), eval(['stats_TVPI' s '(i,:,6) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end

        x = PEfund_TVPI(II & PEfund_type==1);    w = PEfund_size(II & PEfund_type==1);    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_TVPIwtd' s '(i,1:2,1) = [m st];']); end
        x = PEfund_TVPI(II & PEfund_type==1 & PEfund_subtype==1); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_TVPIwtd' s '(i,1:2,2) = [m st];']); end
        x = PEfund_TVPI(II & PEfund_type==1 & PEfund_subtype==2); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_TVPIwtd' s '(i,1:2,3) = [m st];']); end
        x = PEfund_TVPI(II & PEfund_type==1 & PEfund_subtype==3); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_TVPIwtd' s '(i,1:2,4) = [m st];']); end
        x = PEfund_TVPI(II & PEfund_type==2);    w = PEfund_size(II & PEfund_type==2);    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_TVPIwtd' s '(i,1:2,5) = [m st];']); end
        x = PEfund_TVPI(II & PEfund_type==3);    w = PEfund_size(II & PEfund_type==3);    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_TVPIwtd' s '(i,1:2,6) = [m st];']); end
        
        x = PEfund_IRR(II & PEfund_type==1);    if ~isempty(x), eval(['stats_IRR' s '(i,:,1) = [nanmean(x) nanstd(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_IRR(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), eval(['stats_IRR' s '(i,:,2) = [nanmean(x) nanstd(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_IRR(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), eval(['stats_IRR' s '(i,:,3) = [nanmean(x) nanstd(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_IRR(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), eval(['stats_IRR' s '(i,:,4) = [nanmean(x) nanstd(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_IRR(II & PEfund_type==2);    if ~isempty(x), eval(['stats_IRR' s '(i,:,5) = [nanmean(x) nanstd(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_IRR(II & PEfund_type==3);    if ~isempty(x), eval(['stats_IRR' s '(i,:,6) = [nanmean(x) nanstd(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end

        x = PEfund_IRR(II & PEfund_type==1 & ~isnan(PEfund_IRR));    w = PEfund_size(II & PEfund_type==1 & ~isnan(PEfund_IRR));    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_IRRwtd' s '(i,1:2,1) = [m st];']); end
        x = PEfund_IRR(II & PEfund_type==1 & PEfund_subtype==1 & ~isnan(PEfund_IRR)); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==1 & ~isnan(PEfund_IRR)); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_IRRwtd' s '(i,1:2,2) = [m st];']); end
        x = PEfund_IRR(II & PEfund_type==1 & PEfund_subtype==2 & ~isnan(PEfund_IRR)); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==2 & ~isnan(PEfund_IRR)); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_IRRwtd' s '(i,1:2,3) = [m st];']); end
        x = PEfund_IRR(II & PEfund_type==1 & PEfund_subtype==3 & ~isnan(PEfund_IRR)); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==3 & ~isnan(PEfund_IRR)); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_IRRwtd' s '(i,1:2,4) = [m st];']); end
        x = PEfund_IRR(II & PEfund_type==2 & ~isnan(PEfund_IRR));    w = PEfund_size(II & PEfund_type==2 & ~isnan(PEfund_IRR));    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_IRRwtd' s '(i,1:2,5) = [m st];']); end      
        x = PEfund_IRR(II & PEfund_type==3 & ~isnan(PEfund_IRR));    w = PEfund_size(II & PEfund_type==3 & ~isnan(PEfund_IRR));    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_IRRwtd' s '(i,1:2,6) = [m st];']); end      
        
        x = PEfund_PME(II & PEfund_type==1);    if ~isempty(x), eval(['stats_PME' s '(i,:,1) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_PME(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), eval(['stats_PME' s '(i,:,2) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_PME(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), eval(['stats_PME' s '(i,:,3) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_PME(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), eval(['stats_PME' s '(i,:,4) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_PME(II & PEfund_type==2);    if ~isempty(x), eval(['stats_PME' s '(i,:,5) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_PME(II & PEfund_type==3);    if ~isempty(x), eval(['stats_PME' s '(i,:,6) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        
        x = PEfund_PME(II & PEfund_type==1);    w = PEfund_size(II & PEfund_type==1);    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_PMEwtd' s '(i,1:2,1) = [m st];']); end
        x = PEfund_PME(II & PEfund_type==1 & PEfund_subtype==1); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_PMEwtd' s '(i,1:2,2) = [m st];']); end
        x = PEfund_PME(II & PEfund_type==1 & PEfund_subtype==2); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_PMEwtd' s '(i,1:2,3) = [m st];']); end
        x = PEfund_PME(II & PEfund_type==1 & PEfund_subtype==3); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_PMEwtd' s '(i,1:2,4) = [m st];']); end
        x = PEfund_PME(II & PEfund_type==2);    w = PEfund_size(II & PEfund_type==2);    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_PMEwtd' s '(i,1:2,5) = [m st];']); end
        x = PEfund_PME(II & PEfund_type==3);    w = PEfund_size(II & PEfund_type==3);    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_PMEwtd' s '(i,1:2,6) = [m st];']); end
        
    end % loop over years
end % for liq
    

% save summary stats in output folder
save([outdir 'Burgiss_sumstats_' datestr(date,'yyyymmdd')], 'stats_*');




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Estimate fund-level (G)PME and alphas 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

tic

disp(' ')
disp('Estimate performance metrics')
disp(' ')

if debug
    sampset = 1;    % only have VC funds in fake data, so just run VC estimates
else
    sampset = [1:5 7:11 25 29 31 35 37 41 43 47];
end

for samp = sampset
    % samp refers to different assumptions about asset classes, liquidation returns, subperiods, etc.
    % It can take on the following values:
    %   1 -6 : all funds of VC(all), VC(balanced), VC(early-stage), VC(late-stage), Buyout, Growth Equity, respectively.
    %   7 -12: 100% liquidated funds only
    %   13-18: 95% liquidated funds only
    %   19-24: 90% liquidated funds only
    %   25-30: fund size < median for the vintage
    %   31-36: fund size >= median for the vintage
    %   37-42: pre-199x funds only
    %   43-48: post-199x funds only

    
    disp(' '); disp(' '); disp(' ')
    disp('***************************************************************')
    disp(['Processing sample #' num2str(samp)])
    disp('***************************************************************')
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Load data and implement a few selection criteria
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    % start with fresh input dataset
    clearvars PEfund_* stats_* nextperf_*
    load 'temp'
           
    % Select funds based on fund type
    switch samp 
        case {1, 7, 13, 19, 25, 31, 37, 43}
            II = (PEfund_type == 1); s_asset = 'VC';
            
        case {2, 8, 14, 20, 26, 32, 38, 44}
            II = (PEfund_type == 1) & (PEfund_subtype == 1); s_asset = 'VCbal';
        
        case {3, 9, 15, 21, 27, 33, 39, 45}
            II = (PEfund_type == 1) & (PEfund_subtype == 2); s_asset = 'VCearly';    
                
        case {4, 10, 16, 22, 28, 34, 40, 46}
            II = (PEfund_type == 1) & (PEfund_subtype == 3); s_asset = 'VClate';
            
        case {5, 11, 17, 23, 29, 35, 41, 47}
            II = (PEfund_type == 2); s_asset = 'Buyout';
            
        case {6, 12, 18, 24, 30, 36, 42, 48}
            II = (PEfund_type == 3); s_asset = 'GrowthEq';
            
    end
    varlist = whos('PEfund_*'); for i = 1:length(varlist), eval([varlist(i).name ' = ' varlist(i).name '(II>0,:);']); end   
    
    % Select funds based on liquidation status
    switch samp
        case {7, 8, 9, 10, 11, 12}  
            II = (PEfund_liq100 == 1); s_asset = [s_asset '_liq100'];
            
        case {13, 14, 15, 16, 17, 18}    
            II = (PEfund_liq95 == 1); s_asset = [s_asset '_liq95'];
            
        case {19, 20, 21, 22, 23, 24}
            II = (PEfund_liq90 == 1); s_asset = [s_asset '_liq90'];
            
        otherwise
            II = ones(size(PEfund_vintage))>0;  % make sure nothing is dropped
    end
    varlist = whos('PEfund_*'); for i = 1:length(varlist), eval([varlist(i).name ' = ' varlist(i).name '(II>0,:);']); end   
    
    % Select funds based on time period
    switch samp
        case {37, 38, 39, 40, 41, 42}
            II = (PEfund_vintage < 2000); s_asset = [s_asset '_pre00'];
            
        case {43, 44, 45, 46, 47, 48}
            II = (PEfund_vintage >= 2000); s_asset = [s_asset '_post00'];
            
        otherwise
            II = ones(size(PEfund_vintage))>0;  % make sure nothing is dropped
    end
    varlist = whos('PEfund_*'); for i = 1:length(varlist), eval([varlist(i).name ' = ' varlist(i).name '(II>0,:);']); end   
    
    % Select funds based on size (<> median by vintage year)
%     cutoff = median(PEfund_size);
    yrs = min(PEfund_vintage):max(PEfund_vintage);
    cutoff = zeros(length(yrs),1);
    for i = 1:length(yrs)
        cutoff(i,1) = nanmedian(PEfund_size(PEfund_vintage==yrs(i)));
    end
    switch samp
        case {25, 26, 27, 28, 29, 30}
%             II = (PEfund_size < cutoff);
            II = (ones(length(PEfund_size),1)==0);   % make sure it's a logical variable
            for i = 1:length(PEfund_size)
                II(i) = PEfund_size(i) < cutoff(PEfund_vintage(i)-min(yrs)+1);
            end            
            s_asset = [s_asset '_small'];
            
        case {31, 32, 33, 34, 35, 36}
%             II = (PEfund_size >= cutoff);
            II = (ones(length(PEfund_size),1)==0);   % make sure it's a logical variable
            for i = 1:length(PEfund_size)
                II(i) = PEfund_size(i) >= cutoff(PEfund_vintage(i)-min(yrs)+1);
            end   
            s_asset = [s_asset '_large'];
            
        otherwise
            II = ones(size(PEfund_vintage))>0;  % make sure nothing is dropped
    end   
    varlist = whos('PEfund_*'); for i = 1:length(varlist), eval([varlist(i).name ' = ' varlist(i).name '(II>0,:);']); end   

     
    % construct numcf, an NxJ matrix that has the number of cash flows for each fund in every row (counting duplicate dates, so different from PEfund_numCF!).
    numcf = repmat(sum(PEfund_Rm > 0, 2), 1, size(PEfund_Rf,2)); 

    % choose which portfolios to use
    Rf = PEfund_Rf;
    Rm = PEfund_Rm;
    Rm2 = PEfund_Rm2;

    % choose Pastor-Stambaugh (traded) liquidity factor 
    % Rl = PEfund_RPS10; 

    % Compute max times for use in GMM criterion function
    firstdates = PEfund_datenums(:,1);
    lastdates = max(PEfund_datenums,[],2);
    hi = [max(PEfund_T')' PEfund_datenums(:,1) max(PEfund_datenums,[],2)]; 

    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Generate replicating funds for estimation of SDF parameters
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    %normalize to $1 discounted capital calls
    q = ones(1,size(PEfund_CF,2)); 
    ind = (PEfund_CF>=0); 
    negcf = PEfund_CF./Rf; 
    negcf(ind) = 0; 
    cfnorm = -sum(negcf,2)*q; 
    cf  = PEfund_CF./cfnorm;
            

%     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%     % Sanity check: log-utility CAPM should precisely price xRm and Rs to zero
%     %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
%     r = zeros(size(Rm,1),2,size(Rm,2));  
%     r(:,1,:) = xRf;    
%     r(:,2,:) = xRm;     
%     f = zeros(size(Rm,1),2,size(Rm,2));
%     f(:,1,:) = PEfund_T;
%     f(:,2,:) = log(Rm);
%     prc = zeros(size(r,1),size(r,2));
%     aJ = [0; 0; 1];
%     gx = GMMcritg(@fundMCexpaff,[0; -1], f, r, prc, 1, hi, @Smatdatenums, aJ)


    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
    % Log-utility CAPM (i.e., "traditional" PME)
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
    % Include mkt return and Rf here, too -- are irrelevant, but otherwise
    % code for fundMCexpaff function does not work due to two singleton 
    % dimensions Matlab does not know which one to squeeze in this case 
    r = zeros(size(Rm,1), 3, size(Rm,2)); 
    r(:,3,:) = cf;
    r(:,1,:) = ReplicateCF(cf, Rf, PEfund_T);    
    r(:,2,:) = ReplicateCF(cf, Rm, PEfund_T);       
    f = zeros(size(Rm,1), 2, size(Rm,2));
    f(:,1,:) = PEfund_T;
    f(:,2,:) = log(Rm);     
    prc = [ones(size(r,1), size(r,2)-1) zeros(size(r,1), 1)]; % expected sum of discounted fund cash flows should equal zero, while expected sum of discounted market returns should equal the # of returns (same for rf)
    W = eye(3);
    W(3,3) = 0.00001; 
    aJ = [0; 0; 1];
    
    [~, Jfix, ~, bvar, ~, pJopt, pJfix, gt] = GMMcrit(@fundMCexpaff, [0; -1], f, r, prc, 1, hi, @Smatdatenums, aJ); 
    J_logucapm = Jfix  
    pJ_logucapm = pJfix  
    PMEi = gt(:,3);
    PME = mean(gt(:,3)) 
    se_logucapm = sqrt(PME^2/Jfix)
    

    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
    % "levered" PME, using beta=2
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
    % Include mkt return and Rf here, too -- are irrelevant, but otherwise
    % code for fundMCexpaff function does not work due to two singleton 
    % dimensions Matlab does not know which one to squeeze in this case 
    r = zeros(size(Rm2,1), 3, size(Rm2,2)); 
    r(:,3,:) = cf;
    r(:,1,:) = ReplicateCF(cf, Rf, PEfund_T);    
    r(:,2,:) = ReplicateCF(cf, Rm2, PEfund_T);       
    f = zeros(size(Rm2,1), 2, size(Rm2,2));
    f(:,1,:) = PEfund_T;
    f(:,2,:) = log(Rm2);     % discount by twice the market return (sort of like a beta=2 assumption)
    prc = [ones(size(r,1), size(r,2)-1) zeros(size(r,1), 1)]; % expected sum of discounted fund cash flows should equal zero, while expected sum of discounted market returns should equal the # of returns (same for rf)
    W = eye(3);
    W(3,3) = 0.00001; 
    aJ = [0; 0; 1];
    
    [~, Jfix, ~, bvar, ~, pJopt, pJfix, gt] = GMMcrit(@fundMCexpaff, [0; -1], f, r, prc, 1, hi, @Smatdatenums, aJ); 
    J_lev = Jfix  
    pJ_lev = pJfix  
    PMEi_lev = gt(:,3);
    PME_lev = mean(gt(:,3)) 
    se_lev = sqrt(PME^2/Jfix)    
    
    
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
    % Estimate GPME models with GMM
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Choose set of SDF factors (each row is a model)
    factor_set  = {{'log(Rm)'}; ...
                   {'log(Rm)', 'log(PEfund_RSL)-log(Rm)'}; ...
                   {'log(Rm)', 'log(PEfund_RSH)-log(Rm)'}; ...
                   {'log(Rm)', 'log(PEfund_RBL)-log(Rm)'}; ...
                   {'log(Rm)', 'log(PEfund_RBH)-log(Rm)'}};

    % Choose set of assets to fit SDF to (each row is a model)
    fit_set     = {{'Rf','Rm'}; ...
                   {'Rf','Rm','PEfund_RSL'}; ...
                   {'Rf','Rm','PEfund_RSH'}; ...                   
                   {'Rf','Rm','PEfund_RBL'}; ...
                   {'Rf','Rm','PEfund_RBH'}};

    % Choose riskfree rate (needed for estimating betas; each row is a model)
    logRf_set = {'log(Rf)'; ...
                 'log(Rf)'; ...                 
                 'log(Rf)'; ...                 
                 'log(Rf)'; ...                 
                 'log(Rf)'};

    label_set = {{'CAPM', 'capm'}; ...
                 {'Market + small-growth', 'SL'}; ...
                 {'Market + small-value', 'SH'}; ...
                 {'Market + big-growth', 'BL'}; ...
                 {'Market + big-value', 'BH'}};
            
    options = optimset('TolFun',1.0e-18,'MaxIter',2500,'TolX',1.0e-18, ...
                       'MaxFunEvals',100000, 'Display', 'off');
    
    alphai_save = NaN(size(cf,1),length(fit_set));
    GPMEi_save = NaN(size(cf,1),length(fit_set));
                   
    for fm = 1:length(fit_set)   % cycle through models
        factors = factor_set{fm};
        fit = fit_set{fm};
        labels = label_set{fm};

        disp('*****************************************************************************')
        disp(['GPME Model ' num2str(fm) ' (' labels{1} ')'])
        fs = factors{1}; for j = 2:length(factors), fs = [fs ', ' factors{j}]; end    
        disp(['factors  : ' fs])
        fs = fit{1}; for j = 2:length(fit), fs = [fs ', ' fit{j}]; end    
        disp(['fitted to: ' fs])
        disp('*****************************************************************************')

        % Generate replicating funds for estimation of SDF parameters (and include VC cash flows as final asset)
        numr = length(fit)+1;
        r = zeros(size(cf,1),numr,size(cf,2)); 
        for j = 1:length(fit)
            eval(['r(:,j,:) = ReplicateCF(cf, ' fit{j} ', PEfund_T);']);
        end
        r(:,numr,:) = cf; 

        % vector of prices
        prc = zeros(size(r,1),size(r,2));
        % weights
        W = eye(numr);
        W(numr,numr) = 0.00000001;   %SDF parameters chosen to fit Rm and Rf perfectly, put (virtually) no weight on VC

        % set up the pricing factors
        numf = length(factors);
        f = zeros(size(cf,1),numf+1,size(cf,2)); 
        f(:,1,:) = PEfund_T; 
        for j = 1:numf
            eval(['f(:,j+1,:) = ' factors{j} ';'])
        end

        % estimate SDF
        initb = [0.9; -ones(numf,1)*2];%initb = zeros(numf+1,1);%[0; -2; -2]; %0.03; -1.6; -0.5
        aJ = [zeros(numr-1,1); 1];
        if (numf+1 > numr-1)
            fprintf('\nERROR: Too few assets, cannot estimate SDF\n')
            break
        end
        if (numf==1)%(numf+1 == numr-1)
            b = fsolve(@(b) GMMcritg(@fundMCexpaff,b,f,r(:,1:numr-1,:),prc(:,1:numr-1),eye(numr-1)), initb, options)  %fundMCexpaff takes log factors as inputs
        else%if (numf+1 < numr-1)
            b = fminsearch(@(b) sum(GMMcritg(@fundMCexpaff,b,f,r(:,1:numr-1,:),prc(:,1:numr-1),eye(numr-1)).^2), initb, options)
        end        

        % estimate GPME
        [~, Jfix, ~, bvar, ~, ~, pJfix, gt, gvar] = GMMcrit(@fundMCexpaff, b, f, r, prc, W, hi, @Smatdatenums, aJ); 
    
        bse     = sqrt(diag(bvar))
        J       = Jfix  
        pJ      = pJfix  
        GPMEi   = gt(:,numr);
        GPME    = mean(gt(:,numr))
        se      = sqrt(gvar)

        % Back out betas
        f = zeros(size(cf,1),numf+2,size(cf,2));
        f(:,1,:) = PEfund_T;
        eval(['f(:,2,:) = ' logRf_set{fm} ';'])
        for j = 1:numf
            eval(['f(:,j+2,:) = ' factors{j} ';'])
        end    
         
        % estimate factor covariance matrix, taking into account longer
        % horizons as well (possibly some mean reversion)
        vart = zeros(length(factors), length(factors), 10);
        for i = 1:15
            ind = (PEfund_T > i-1 & PEfund_T <= i); 
            S = []; for j = 1:length(factors), eval(['fj = ' factors{j} ';']); S(:,j) = fj(ind)./sqrt(PEfund_T(ind)); end
            vart(:,:,i) = cov(S);    % more accurate to scale each obs by its PEfund_T?
        end
        
        if numf == 1 % one factor only
            initb = 0.1;  % reasonable beta
            [betalo, fvallo] = fsolve(@(b) GMMcritg_hvar(@fundMCexpaff_hvar, b, vart, f, r(:,numr,:), GPME, 1), initb, options)  %fundMCexpaff takes log factors as inputs
    
            b = betalo; 
            [gtlo, ~] = fundMCexpaff_hvar(b, vart, f, r(:,numr,:), GPME, 0, 1); 
            varlo = var(gtlo)    % compute variance of alpha for this beta estimate
                
            initb = 30;   % crazy beta
            [betahi, fvalhi] = fsolve(@(b) GMMcritg_hvar(@fundMCexpaff_hvar, b, vart, f, r(:,numr,:), GPME, 1), initb, options)  %fundMCexpaff takes log factors as inputs
    
            b = betahi; 
            [gthi, ~] = fundMCexpaff_hvar(b, vart, f, r(:,numr,:), GPME, 0, 1); 
            varhi = var(gthi)    % compute variance of alpha for this beta estimate

            % pick beta estimate at which variance of alpha is lowest
            if varlo < varhi    
                beta_est = betalo
                g_est = gtlo;
            else
                beta_est = betahi
                g_est = gthi;
            end

            minv = var(g_est) % report as a model-selection metric   
        
            % Apply only to r(:,numr,:) since benchmark portfolio requires 
            % asset-specific betas to price, and hence does not price rf, Rm with the PE beta
            aJ = 1; 
            [~, Jfix, ~, bvar, ~, ~, pJfix, gt] = GMMcrit_hvar(@fundMCexpaff_hvar, beta_est, vart, f, r(:,numr,:), prc(:,numr), 1, hi, @Smatdatenums, aJ); 
             
        elseif numf == 2    % more than 1 factor (NB: currently can only do no more than two factors)
        
            nbeta1 = 300; 
        
            beta1s = linspace(-5,5,nbeta1); % grid
            beta2s = zeros(nbeta1,1);         
            vars = zeros(nbeta1,1); 
            ms = zeros(nbeta1,1); 
            gs = zeros(nbeta1,size(r,1));             
            
            for j = 1:nbeta1 
            
                initb = -10;  % low starting point for beta2   
            
                [betalo, fvallo] = fsolve(@(b) GMMcritg_hvar(@fundMCexpaff_hvar, [beta1s(j); b], vart, f, r(:,numr,:), GPME, 1), initb, options);  %fundMCexpaff takes log factors as inputs
           
                b = betalo; 
                [gtlo, dg] = fundMCexpaff_hvar([beta1s(j); b], vart, f, r(:,numr,:), GPME, 0, 1); 
                varlo = var(gtlo);    % compute variance of alpha for this beta estimate
                mlo = mean(gtlo); 
                varlo = varlo + 100000*mlo^2;
                
                initb = 10;  % high starting point for beta2
                [betahi, fvallo] = fsolve(@(b) GMMcritg_hvar(@fundMCexpaff_hvar, [beta1s(j); b], vart, f, r(:,numr,:), GPME, 1), initb, options);  %fundMCexpaff takes log factors as inputs
           
                b = betahi; 
                [gthi, dg] = fundMCexpaff_hvar([beta1s(j); b], vart, f, r(:,numr,:), GPME, 0, 1); 
                varhi = var(gthi);    % compute variance of alpha for this beta estimate
                mhi = mean(gthi); 
                varhi = varhi + 100000*mhi^2; 
            
                % pick beta estimate at which variance of alpha is lowest
                if varlo < varhi    
                    beta2s(j) = betalo;
                    vars(j) = varlo; 
                    ms(j) = mlo; 
                    gs(j,:) = gtlo'; 
                else
                    beta2s(j) = betahi;
                    vars(j) = varhi; 
                    ms(j) = mhi; 
                    gs(j,:) = gthi'; 
                end
                
            end
            
            [minv,minind] = min(vars); disp(minv)
            beta_est = [beta1s(minind); beta2s(minind)]
            mg = mean(gs,2); 
            mg_est = mg(minind)
        
            if mg_est > 0.001
                fprintf('\nERROR: Solution not found')
                beta_est = [NaN; NaN];
                gt = NaN(size(alphai_save,1),1);
            else
                aJ = 1; 
                [~, Jfix, ~, bvar, ~, ~, pJfix, gt] = GMMcrit_hvar(@fundMCexpaff_hvar, beta_est, vart, f, r(:,numr,:), prc(:,numr), 1, hi, @Smatdatenums, aJ);     
            end
                                                                        
        else
            disp('WARNING: Can only two factors or fewer'); 
        end % if numf == 1
            
        alphai_save(:,fm) = gt;
        GPMEi_save(:,fm)  = GPMEi;           
    end %for fm               
                       
    

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Output results
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    disp(['Sample #' num2str(samp)])
    disp(['# Funds = ' num2str(size(PEfund_CF,1))]);
    
    disp('Correlation across factor models, GPME:');
    corrcoef(GPMEi_save)
    
    disp('Correlation across factor models, alpha:');
    corrcoef(alphai_save)
    
    for fm = 1:size(fit_set,1) % cycle through the various factor models used to estimate GPME and alpha
    
        labels = label_set{fm};
        disp('---------------------------------------------');
        disp(labels{1});
        disp('---------------------------------------------');
        s_fm = labels{2};

        GPMEi = GPMEi_save(:,fm); 
        alphai = alphai_save(:,fm);
        
        f = '%10.4f';
        disp(' ');
        disp('          alpha     GPME      PME     levered PME')
        disp(['Mean     ' num2str(mean(alphai),f) '   ' num2str(mean(GPMEi),f) '   ' num2str(mean(PMEi),f) '   ' num2str(mean(PMEi_lev),f)]);
        disp(' ')
        disp(['Min      ' num2str(min(alphai),f) '   ' num2str(min(GPMEi),f) '   ' num2str(min(PMEi),f) '   ' num2str(min(PMEi_lev),f)]);
        disp(['10th     ' num2str(prctile(alphai,10),f) '   ' num2str(prctile(GPMEi,10),f) '   ' num2str(prctile(PMEi,10),f) '   ' num2str(prctile(PMEi_lev,10),f)]);
        disp(['25th     ' num2str(prctile(alphai,25),f) '   ' num2str(prctile(GPMEi,25),f) '   ' num2str(prctile(PMEi,25),f) '   ' num2str(prctile(PMEi_lev,25),f)]);    
        disp(['Median   ' num2str(median(alphai),f) '   ' num2str(median(GPMEi),f) '   ' num2str(median(PMEi),f) '   ' num2str(median(PMEi_lev),f)]);
        disp(['75th     ' num2str(prctile(alphai,75),f) '   ' num2str(prctile(GPMEi,75),f) '   ' num2str(prctile(PMEi,75),f) '   ' num2str(prctile(PMEi_lev,75),f)]);
        disp(['90th     ' num2str(prctile(alphai,90),f) '   ' num2str(prctile(GPMEi,90),f) '   ' num2str(prctile(PMEi,90),f) '   ' num2str(prctile(PMEi_lev,90),f)]);
        disp(['Max      ' num2str(max(alphai),f) '   ' num2str(max(GPMEi),f) '   ' num2str(max(PMEi),f) '   ' num2str(max(PMEi_lev),f)]);
        disp(' ')
        disp(['St.Dev.  ' num2str(std(alphai),f) '   ' num2str(std(GPMEi),f) '   ' num2str(std(PMEi),f) '   ' num2str(std(PMEi_lev),f)]);
        disp(['Skewness ' num2str(skewness(alphai),f) '   ' num2str(skewness(GPMEi),f) '   ' num2str(skewness(PMEi),f) '   ' num2str(skewness(PMEi_lev),f)]);
        disp(['Kurtosis ' num2str(kurtosis(alphai),f) '   ' num2str(kurtosis(GPMEi),f) '   ' num2str(kurtosis(PMEi),f) '   ' num2str(kurtosis(PMEi_lev),f)]);  

        disp(' ')
        correl = corrcoef(alphai, GPMEi);
        disp(['Corr(GPME,alpha) = ' num2str(correl(1,2))]);
        correl = corrcoef(alphai, PMEi);
        disp(['Corr(PME,alpha) = ' num2str(correl(1,2))]);
        correl = corrcoef(PMEi, GPMEi);
        disp(['Corr(GPME,PME) = ' num2str(correl(1,2))]);
        correl = corrcoef(PMEi_lev, GPMEi);
        disp(['Corr(GPME,levered PME) = ' num2str(correl(1,2))]);
        correl = corrcoef(alphai, PMEi_lev);
        disp(['Corr(levered PME,alpha) = ' num2str(correl(1,2))]);
        
        disp(' ')
        disp(['RMSE alpha vs GPME   = ' num2str(sqrt(mean((GPMEi-alphai).^2)),f)]);
        disp(['Mean absolute deviation alpha vs GPME   = ' num2str(mean(abs(GPMEi-alphai)),f)]);
        disp(['Median absolute deviation alpha vs GPME = ' num2str(median(abs(GPMEi-alphai)),f)]);
        disp(' ')
        disp(['RMSE alpha vs PME   = ' num2str(sqrt(mean((PMEi-alphai).^2)),f)]);
        disp(['Mean absolute deviation alpha vs PME   = ' num2str(mean(abs(PMEi-alphai)),f)]);
        disp(['Median absolute deviation alpha vs PME = ' num2str(median(abs(PMEi-alphai)),f)]);
        disp(' ')
        disp(['RMSE PME vs GPME   = ' num2str(sqrt(mean((GPMEi-PMEi).^2)),f)]);
        disp(['Mean absolute deviation PME vs GPME   = ' num2str(mean(abs(GPMEi-PMEi)),f)]);
        disp(['Median absolute deviation PME vs GPME = ' num2str(median(abs(GPMEi-PMEi)),f)]);
    

        % Compute GPMEs and alphas by vintage year, and across all years and factor models
        %if (samp <= 12)
        stats_years = (min(PEfund_vintage):max(PEfund_vintage))';
        stats_numfunds  = ones(length(stats_years)+6,1)*NaN;
        stats_numfirms  = ones(length(stats_years)+6,1)*NaN;
        stats_PME       = ones(length(stats_years)+6,11)*NaN;
        stats_GPME      = ones(length(stats_years)+6,11)*NaN;
        stats_alpha     = ones(length(stats_years)+6,11)*NaN;
        stats_PMEwtd	= ones(length(stats_years)+6,2)*NaN;
        stats_GPMEwtd	= ones(length(stats_years)+6,2)*NaN;
        stats_alphawtd	= ones(length(stats_years)+6,2)*NaN;
        for i = 1:length(stats_years)+6
            % first row is <=1980, second row is <=1985
            % final 4 rows are by decade (<1990, 1990-99, 2000-10) and across all years, respectively.
            if i == 1
                II = (PEfund_vintage <= 1980);
            elseif i == 2
                II = (PEfund_vintage <= 1985);
            elseif i <= length(stats_years)+2
                II = (PEfund_vintage == stats_years(i-2));
            elseif i == length(stats_years)+3
                II = (PEfund_vintage <= 1989);
            elseif i == length(stats_years)+4
                II = (PEfund_vintage >= 1990) & (PEfund_vintage <= 1999);
            elseif i == length(stats_years)+5
                II = (PEfund_vintage >= 2000);
            elseif i == length(stats_years)+6
                II = (ones(size(PEfund_vintage))>0);    % across all years
            end

            % Construct vector with # funds by vintage year
            stats_numfunds(i) = sum(II>0);

            % vector with # firms (same structure as stats_numfunds)
            % NB: Only funds up to 2012Q2 have a firm id assigned! 
            stats_numfirms(i) = sum(~isnan(unique(PEfund_firmid(II))));

            % Performance metrics
            x = PMEi(II);
            if ~isempty(x)
                stats_PME(i,:) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)]; 
                w = PEfund_size(II); m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); stats_PMEwtd(i,1:2) = [m st]; 
            end
 
            x = GPMEi(II); 
            if ~isempty(x)
                stats_GPME(i,:) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)]; 
                w = PEfund_size(II); m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); stats_GPMEwtd(i,1:2) = [m st]; 
            end
 
            x = alphai(II); 
            if ~isempty(x)
                stats_alpha(i,:) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)]; 
                w = PEfund_size(II); m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); stats_alphawtd(i,1:2) = [m st]; 
            end        
        end % loop over years

        % save alpha and GPME stats in output folder
        save([outdir 'Burgiss_vintageperf_' s_asset '_' s_fm '_' datestr(date,'yyyymmdd')], 'stats_*');
    

        
        
            
        % Make figures
        if (samp==1) || (samp == 5) || (samp==6)
        
        % non-truncated histograms
        h = figure('pos',[10 10 400 250]);
        hold on;
        xvalmin = floor( min(min(alphai),min(GPMEi)) ); xvalmin = floor( min(xvalmin,min(PMEi)) );
        xvalmax = ceil( max(max(alphai),max(GPMEi)) ); xvalmax = ceil( max(xvalmax,max(PMEi)) );
        Nbins = histc(alphai,xvalmin:0.5:xvalmax);
        hb = bar(xvalmin:0.5:xvalmax,Nbins/sum(Nbins),'histc');
        set(hb,'FaceColor',[.8 .8 .8],'EdgeColor','black');
        set(gca,'FontSize',9);
        set(gca,'XTick',floor(xvalmin/2)*2:2:floor(xvalmax/2)*2); % even numbers
        %set(gca,'XMinorTick','On');
        set(gca,'XGrid','on')   % for easy comparison across graphs
        v = axis; axis([xvalmin xvalmax v(3) v(4)])
        ylabel('density'); xlabel('\alpha')
        box on;
        set(gca,'Layer','top') % display gridlines on top of graph    
        set(h,'PaperPositionMode','auto')
        savefig(h, [outdir 'hist_alpha_' s_asset '_' s_fm]);

        h = figure('pos',[10 10 400 250]);
        hold on;
        Nbins = histc(GPMEi,xvalmin:0.5:xvalmax);
        hb = bar(xvalmin:0.5:xvalmax,Nbins/sum(Nbins),'histc');
        set(hb,'FaceColor',[.8 .8 .8],'EdgeColor','black');
        set(gca,'FontSize',9);
        set(gca,'XTick',floor(xvalmin/2)*2:2:floor(xvalmax/2)*2); % even numbers
        %set(gca,'XMinorTick','On');
        set(gca,'XGrid','on')   % for easy comparison across graphs
        v = axis; axis([xvalmin xvalmax v(3) v(4)])
        ylabel('density'); xlabel('GPME')
        box on;
        set(gca,'Layer','top') % display gridlines on top of graph    
        savefig(gcf, [outdir 'hist_GPME_' s_asset '_' s_fm]);    

        if (fm == 1) % only need to output PME for one factor model (as it does not vary with factor models)
        h = figure('pos',[10 10 400 250]);
        hold on;
        Nbins = histc(PMEi,xvalmin:0.5:xvalmax);
        hb = bar(xvalmin:0.5:xvalmax,Nbins/sum(Nbins),'histc');
        set(hb,'FaceColor',[.8 .8 .8],'EdgeColor','black');
        set(gca,'FontSize',9);
        set(gca,'XTick',floor(xvalmin/2)*2:2:floor(xvalmax/2)*2); % even numbers
        %set(gca,'XMinorTick','On');
        set(gca,'XGrid','on')   % for easy comparison across graphs
        v = axis; axis([xvalmin xvalmax v(3) v(4)])
        ylabel('density'); xlabel('PME')
        box on;
        set(gca,'Layer','top') % display gridlines on top of graph    
        savefig(gcf, [outdir 'hist_PME_' s_asset]);    
        end 
        
        % truncated histograms
        h = figure('pos',[10 10 400 250]);
        hold on;
%         xvalmin = floor( min(min(alphai),min(GPMEi)) ); xvalmin = floor( min(xvalmin,min(PMEi)) );
%         xvalmax = ceil( max(max(alphai),max(GPMEi)) ); xvalmax = ceil( max(xvalmax,max(PMEi)) );
        Nbins = histc(alphai,[-inf -3:0.5:6 inf]);
        hb = bar(-3.5:0.5:6.5,Nbins/sum(Nbins),'histc');
        set(hb,'FaceColor',[.8 .8 .8],'EdgeColor','black');
        set(gca,'FontSize',9);
        set(gca,'XTick',[-3.5 -3 -2:2:6 6.5]);
        set(gca,'XTickLabel',{'<-3','','-2','0','2','4','','\geq6'});
        %set(gca,'XMinorTick','On');
        set(gca,'XGrid','on')   % for easy comparison across graphs
        v = axis; axis([-3.5 6.5 v(3) v(4)])
        ylabel('density'); xlabel('\alpha')
        box on;
        set(gca,'Layer','top') % display gridlines on top of graph    
        set(h,'PaperPositionMode','auto')
        savefig(h, [outdir 'hist_alpha_trunc_' s_asset '_' s_fm]);

        h = figure('pos',[10 10 400 250]);
        hold on;
        Nbins = histc(GPMEi,[-inf -3:0.5:6 inf]);
        hb = bar(-3.5:0.5:6.5,Nbins/sum(Nbins),'histc');
        set(hb,'FaceColor',[.8 .8 .8],'EdgeColor','black');
        set(gca,'FontSize',9);
        set(gca,'XTick',[-3.5 -3 -2:2:6 6.5]);
        set(gca,'XTickLabel',{'<-3','','-2','0','2','4','','\geq6'});
        %set(gca,'XMinorTick','On');
        set(gca,'XGrid','on')   % for easy comparison across graphs
        v = axis; axis([-3.5 6.5 v(3) v(4)])
        ylabel('density'); xlabel('GPME')
        box on;
        set(gca,'Layer','top') % display gridlines on top of graph    
        savefig(gcf, [outdir 'hist_GPME_trunc_' s_asset '_' s_fm]);    

        if (fm == 1) % only need to output PME for one factor model (as it does not vary with factor models)
        h = figure('pos',[10 10 400 250]);
        hold on;
        Nbins = histc(PMEi,[-inf -3:0.5:6 inf]);
        hb = bar(-3.5:0.5:6.5,Nbins/sum(Nbins),'histc');
        set(hb,'FaceColor',[.8 .8 .8],'EdgeColor','black');
        set(gca,'FontSize',9);
        set(gca,'XTick',[-3.5 -3 -2:2:6 6.5]);
        set(gca,'XTickLabel',{'<-3','','-2','0','2','4','','\geq6'});
        %set(gca,'XMinorTick','On');
        set(gca,'XGrid','on')   % for easy comparison across graphs
        v = axis; axis([-3.5 6.5 v(3) v(4)])
        ylabel('density'); xlabel('PME')
        box on;
        set(gca,'Layer','top') % display gridlines on top of graph    
        savefig(gcf, [outdir 'hist_PME_trunc_' s_asset]);    
        end         
        
        end % if (samp==1) || (samp == 5) || (samp==6)
        
        
        if (samp==1) || (samp == 5) || (samp==6)

        h = figure('pos',[10 10 600 500]);
        hold on;
        plot(alphai, GPMEi, 'ok');
        set(gca,'FontSize',9)
        v = axis; axis([min(v(1),v(3)) max(v(2),v(4)) min(v(1),v(3)) max(v(2),v(4))]);
        v = axis; line(v(1:2),v(1:2));
        box on;
        xlabel('\alpha'); ylabel('GPME'); grid on;
        set(h,'PaperPositionMode','auto')
        savefig(h, [outdir 'alpha_vs_GPME_' s_asset '_' s_fm]);    

        h = figure('pos',[10 10 600 500]);
        hold on;
        plot(alphai, PMEi, 'ok');
        set(gca,'FontSize',9)
        v = axis; axis([min(v(1),v(3)) max(v(2),v(4)) min(v(1),v(3)) max(v(2),v(4))]);
        v = axis; line(v(1:2),v(1:2));
        box on;
        xlabel('\alpha'); ylabel('PME'); grid on;
        set(h,'PaperPositionMode','auto')
        savefig(h, [outdir 'alpha_vs_PME_' s_asset '_' s_fm]);         

        h = figure('pos',[10 10 600 500]);
        hold on;
        plot(GPMEi, PMEi, 'ok');
        set(gca,'FontSize',9)
        v = axis; axis([min(v(1),v(3)) max(v(2),v(4)) min(v(1),v(3)) max(v(2),v(4))]);
        v = axis; line(v(1:2),v(1:2));
        box on;
        xlabel('GPME'); ylabel('PME'); grid on;
        set(h,'PaperPositionMode','auto')
        savefig(h, [outdir 'GPME_vs_PME_' s_asset '_' s_fm]);      
    
        end % if (samp==1) || (samp == 5) || (samp==6)
        
        
        
        if (samp==1) || (samp == 5) || (samp==6)

        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        % Predictability: Performance of next fund(s) based on top/bottom funds 
        %                 of a given vintage
        % NB: For overlapping funds, the mechanical market overlap effects should 
        %     be taken out by GPME/alpha, so less susceptible to Korteweg-Sorensen critique
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        disp(' ')
        for topset = 1:6 % Choose top and bottom decile/quintile/tercile/5 funds for a given vintage year 
            switch topset
                case 1, s_topset = 'decile';
                case 2, s_topset = 'quintile';
                case 3, s_topset = 'quartile';
                case 4, s_topset = 'tercile';
                case 5, s_topset = '5funds';
                case 6, s_topset = '3funds';
            end
            eval(['nextperf_numfundsin_' s_topset ' = ones(2010-1985+1,3)*NaN;']);    % # funds in top/bottom for each vintage, by performance metric
            eval(['nextperf_overlap_top_' s_topset ' = ones(2010-1985+1,3,3,4,2)*NaN;']); % [vintageyear perfmetric(selection for top/bottom) perfmetric(followon funds) metric(mean/median/SD/N) nextfund/allsubsequentfunds]
            eval(['nextperf_overlap_bottom_' s_topset ' = ones(2010-1985+1,3,3,4,2)*NaN;']);
            eval(['nextperf_nonoverlap_top_' s_topset ' = ones(2010-1985+1,3,3,4,2)*NaN;']);
            eval(['nextperf_nonoverlap_bottom_' s_topset ' = ones(2010-1985+1,3,3,4,2)*NaN;']);            
            for yr = 1:2010-1985+1   % process by vintage year from 1985 to 2010 (need 2-3 years to raise next fund + time for reasonable perf measurement)                
                for m = 1:3 % cycle through performance metric used to sort funds
                    II = find(PEfund_vintage == 1985+yr-1);    % find all funds of vintage year "yr"
                    switch m
                        case 1, Perf = alphai(II); disp(['Predictability: top/bottom ' s_topset ' for vintage ' num2str(1985+yr-1) ', based on alpha' sum(~isnan(alphai))]);
                        case 2, Perf = GPMEi(II); disp(['Predictability: top/bottom ' s_topset ' for vintage ' num2str(1985+yr-1) ', based on GPME' sum(~isnan(GPMEi))]);
                        case 3, Perf = PMEi(II); disp(['Predictability: top/bottom ' s_topset ' for vintage ' num2str(1985+yr-1) ', based on PME' sum(~isnan(PMEi))]);
                    end
                    Z = sortrows([II Perf],2);  % sort by performance (ascending)
                    Z(isnan(Z(:,2)),:) = [];    % eliminate funds with unknown performance
                                      
                    if ~isempty(Z)
                        % choose top and bottom sets according to "topset"
                        switch topset
                            case 1, numsel = round(size(Z,1)/10);
                            case 2, numsel = round(size(Z,1)/5);
                            case 3, numsel = round(size(Z,1)/4);
                            case 4, numsel = round(size(Z,1)/3);
                            case 5, numsel = min(5, size(Z,1));
                            case 6, numsel = min(3, size(Z,1));
                        end
                        if numsel <1, numsel = 1; end
                        disp(numsel)
                        disp(size(Z))
                        Ztop = Z(end-numsel+1:end,1);
                        Zbot = Z(1:numsel,1);
                        Ztop_alpha = NaN(numsel,20);	% allow up to 20 follow-on funds 
                        Zbot_alpha = NaN(numsel,20);    % allow up to 20 follow-on funds
                        Ztop_GPME = NaN(numsel,20);     % allow up to 20 follow-on funds 
                        Zbot_GPME = NaN(numsel,20);     % allow up to 20 follow-on funds
                        Ztop_PME = NaN(numsel,20);      % allow up to 20 follow-on funds 
                        Zbot_PME = NaN(numsel,20);      % allow up to 20 follow-on funds
                        IItop_nonoverlap = zeros(numsel,20);    % indicators in Ztop_YYY that choose only the nonoverlapping follow-on funds for each GP
                        IIbot_nonoverlap = zeros(numsel,20);    % indicators in Zbot_YYY that choose only the nonoverlapping follow-on funds for each GP
                        IItop_nonoverlap_next = NaN(numsel,1);    % column numbers in Ztop_YYY that choose only the next nonoverlapping follow-on fund for each GP
                        IIbot_nonoverlap_next = NaN(numsel,1);    % column numbers in Zbot_YYY that choose only the next nonoverlapping follow-on fund for each GP
                        
                        disp(size(Ztop))
                        disp(size(Zbot))
                        
                        if ~isempty(Ztop) && ~isempty(Zbot)
                            eval(['nextperf_numfundsin_' s_topset '(yr,m) = numsel;']);

                            disp('numsel='); disp(numsel)
                            
                            for i = 1:numsel    
                                % go through each top firm
                                II = find((PEfund_firmid == PEfund_firmid(Ztop(i))) & (PEfund_seqnr > PEfund_seqnr(Ztop(i))));  % NB: don't need to select on same type, that's already implied in samp  
                                if ~isempty(II)
                                    Z = sortrows([PEfund_vintage(II) alphai(II) GPMEi(II) PMEi(II)],1);
                                    Z(isnan(sum(Z(:,2:end)')),:) = [];  % keep only follow-on funds for which *all* performance metrics are known (so fair comparison will be made)

                                    if ~isempty(Z)
                                        Ztop_alpha(i,1:size(Z,1)) = Z(:,2); 
                                        Ztop_GPME(i,1:size(Z,1)) = Z(:,3);    
                                        Ztop_PME(i,1:size(Z,1)) = Z(:,4);    

                                        % define indicators w/in Ztop_YYY that select only the nonoverlapping funds for each GP
                                        latestvint = 1985+yr-1;
                                        for j = 1:size(Z,1) % go through the sequence for this GP
                                            if Z(j,1) >= latestvint+10
                                                IItop_nonoverlap(i,j) = 1;
                                                if (latestvint == 1985+yr-1)
                                                    IItop_nonoverlap_next(i) = j;   % only indicate the first nonoverlapping fund after the current one
                                                end
                                                latestvint = Z(j,1);
                                            end
                                        end
                                    end
                                end

                                % same for each bottom firm
                                II = find((PEfund_firmid == PEfund_firmid(Zbot(i))) & (PEfund_seqnr > PEfund_seqnr(Zbot(i))));    % NB: don't need to select on same type, that's already implied in samp  
                                if ~isempty(II)
                                    Z = sortrows([PEfund_vintage(II) alphai(II) GPMEi(II) PMEi(II)],1);
                                    Z(isnan(sum(Z(:,2:end)')),:) = [];  % keep only follow-on funds for which *all* performance metrics are known (so fair comparison will be made)

                                    if ~isempty(Z)
                                        Zbot_alpha(i,1:size(Z,1)) = Z(:,2); 
                                        Zbot_GPME(i,1:size(Z,1)) = Z(:,3);    
                                        Zbot_PME(i,1:size(Z,1)) = Z(:,4);    

                                        % define indicators w/in Zbot_YYY that select only the nonoverlapping funds for each GP
                                        latestvint = 1985+yr-1;
                                        for j = 1:size(Z,1) % go through the sequence for this GP
                                            if Z(j,1) >= latestvint+10
                                                IIbot_nonoverlap(i,j) = 1;
                                                if (latestvint == 1985+yr-1)
                                                    IIbot_nonoverlap_next(i) = j;   % only indicate the first nonoverlapping fund after the current one
                                                end
                                                latestvint = Z(j,1);
                                            end
                                        end 
                                    end
                                end
                            end   % for i
                            % next top fund performance, overlapping funds allowed
                            eval(['nextperf_overlap_top_' s_topset '(yr,m,:,1,1) = nanmean([Ztop_alpha(:,1) Ztop_GPME(:,1) Ztop_PME(:,1)]);']);
                            eval(['nextperf_overlap_top_' s_topset '(yr,m,:,2,1) = nanmedian([Ztop_alpha(:,1) Ztop_GPME(:,1) Ztop_PME(:,1)]);']);
                            eval(['nextperf_overlap_top_' s_topset '(yr,m,:,3,1) = nanstd([Ztop_alpha(:,1) Ztop_GPME(:,1) Ztop_PME(:,1)]);']);
                            eval(['nextperf_overlap_top_' s_topset '(yr,m,:,4,1) = [sum(~isnan(Ztop_alpha(:,1))) sum(~isnan(Ztop_GPME(:,1))) sum(~isnan(Ztop_PME(:,1)))];']);   % N=1

                            % all top subsequent funds' performance, overlapping funds allowed (NB: also gives # subsequent funds raised)
                            eval(['nextperf_overlap_top_' s_topset '(yr,m,:,1,2) = [nanmean(reshape(Ztop_alpha,[],1)) nanmean(reshape(Ztop_GPME,[],1)) nanmean(reshape(Ztop_PME,[],1))];']);
                            eval(['nextperf_overlap_top_' s_topset '(yr,m,:,2,2) = [nanmedian(reshape(Ztop_alpha,[],1)) nanmedian(reshape(Ztop_GPME,[],1)) nanmedian(reshape(Ztop_PME,[],1))];']);
                            eval(['nextperf_overlap_top_' s_topset '(yr,m,:,3,2) = [nanstd(reshape(Ztop_alpha,[],1)) nanstd(reshape(Ztop_GPME,[],1)) nanstd(reshape(Ztop_PME,[],1))];']);
                            eval(['nextperf_overlap_top_' s_topset '(yr,m,:,4,2) = [sum(~isnan(reshape(Ztop_alpha,[],1))) sum(~isnan(reshape(Ztop_GPME,[],1))) sum(~isnan(reshape(Ztop_PME,[],1)))];']); 

                            II = sub2ind(size(Ztop_alpha),[1:size(Ztop_alpha,1)]',IItop_nonoverlap_next); II(isnan(II))=[]; % the "isnan" part properly drops firms with no followon funds
                            eval(['nextperf_nonoverlap_top_' s_topset '(yr,m,:,1,1) = [nanmean(Ztop_alpha(II)) nanmean(Ztop_GPME(II)) nanmean(Ztop_PME(II))];']); 
                            eval(['nextperf_nonoverlap_top_' s_topset '(yr,m,:,2,1) = [nanmedian(Ztop_alpha(II)) nanmedian(Ztop_GPME(II)) nanmedian(Ztop_PME(II))];']); 
                            eval(['nextperf_nonoverlap_top_' s_topset '(yr,m,:,3,1) = [nanstd(Ztop_alpha(II)) nanstd(Ztop_GPME(II)) nanstd(Ztop_PME(II))];']); 
                            eval(['nextperf_nonoverlap_top_' s_topset '(yr,m,:,4,1) = length(II)*ones(3,1);']); 

                            eval(['nextperf_nonoverlap_top_' s_topset '(yr,m,:,1,2) = [nanmean(Ztop_alpha(IItop_nonoverlap>0)) nanmean(Ztop_GPME(IItop_nonoverlap>0)) nanmean(Ztop_PME(IItop_nonoverlap>0))];']);
                            eval(['nextperf_nonoverlap_top_' s_topset '(yr,m,:,2,2) = [nanmedian(Ztop_alpha(IItop_nonoverlap>0)) nanmedian(Ztop_GPME(IItop_nonoverlap>0)) nanmedian(Ztop_PME(IItop_nonoverlap>0))];']);
                            eval(['nextperf_nonoverlap_top_' s_topset '(yr,m,:,3,2) = [nanstd(Ztop_alpha(IItop_nonoverlap>0)) nanstd(Ztop_GPME(IItop_nonoverlap>0)) nanstd(Ztop_PME(IItop_nonoverlap>0))];']);
                            eval(['nextperf_nonoverlap_top_' s_topset '(yr,m,:,4,2) = [sum(sum(IItop_nonoverlap>0))*ones(3,1)];']);


                            % next bottom fund performance, overlapping funds allowed
                            eval(['nextperf_overlap_bottom_' s_topset '(yr,m,:,1,1) = nanmean([Zbot_alpha(:,1) Zbot_GPME(:,1) Zbot_PME(:,1)]);']);
                            eval(['nextperf_overlap_bottom_' s_topset '(yr,m,:,2,1) = nanmedian([Zbot_alpha(:,1) Zbot_GPME(:,1) Zbot_PME(:,1)]);']);
                            eval(['nextperf_overlap_bottom_' s_topset '(yr,m,:,3,1) = nanstd([Zbot_alpha(:,1) Zbot_GPME(:,1) Zbot_PME(:,1)]);']);
                            eval(['nextperf_overlap_bottom_' s_topset '(yr,m,:,4,1) = [sum(~isnan(Zbot_alpha(:,1))) sum(~isnan(Zbot_GPME(:,1))) sum(~isnan(Zbot_PME(:,1)))];']);   % N=1

                            % all bottom subsequent funds' performance, overlapping funds allowed (NB: also gives # subsequent funds raised)
                            eval(['nextperf_overlap_bottom_' s_topset '(yr,m,:,1,2) = [nanmean(reshape(Zbot_alpha,[],1)) nanmean(reshape(Zbot_GPME,[],1)) nanmean(reshape(Zbot_PME,[],1))];']);
                            eval(['nextperf_overlap_bottom_' s_topset '(yr,m,:,2,2) = [nanmedian(reshape(Zbot_alpha,[],1)) nanmedian(reshape(Zbot_GPME,[],1)) nanmedian(reshape(Zbot_PME,[],1))];']);
                            eval(['nextperf_overlap_bottom_' s_topset '(yr,m,:,3,2) = [nanstd(reshape(Zbot_alpha,[],1)) nanstd(reshape(Zbot_GPME,[],1)) nanstd(reshape(Zbot_PME,[],1))];']);
                            eval(['nextperf_overlap_bottom_' s_topset '(yr,m,:,4,2) = [sum(~isnan(reshape(Zbot_alpha,[],1))) sum(~isnan(reshape(Zbot_GPME,[],1))) sum(~isnan(reshape(Zbot_PME,[],1)))];']); 

                            II = sub2ind(size(Zbot_alpha),[1:size(Zbot_alpha,1)]',IIbot_nonoverlap_next); II(isnan(II))=[]; % the "isnan" part properly drops firms with no followon funds
                            eval(['nextperf_nonoverlap_bottom_' s_topset '(yr,m,:,1,1) = [nanmean(Zbot_alpha(II)) nanmean(Zbot_GPME(II)) nanmean(Zbot_PME(II))];']); 
                            eval(['nextperf_nonoverlap_bottom_' s_topset '(yr,m,:,2,1) = [nanmedian(Zbot_alpha(II)) nanmedian(Zbot_GPME(II)) nanmedian(Zbot_PME(II))];']); 
                            eval(['nextperf_nonoverlap_bottom_' s_topset '(yr,m,:,3,1) = [nanstd(Zbot_alpha(II)) nanstd(Zbot_GPME(II)) nanstd(Zbot_PME(II))];']); 
                            eval(['nextperf_nonoverlap_bottom_' s_topset '(yr,m,:,4,1) = length(II)*ones(3,1);']); 

                            eval(['nextperf_nonoverlap_bottom_' s_topset '(yr,m,:,1,2) = [nanmean(Zbot_alpha(IIbot_nonoverlap>0)) nanmean(Zbot_GPME(IIbot_nonoverlap>0)) nanmean(Zbot_PME(IIbot_nonoverlap>0))];']);
                            eval(['nextperf_nonoverlap_bottom_' s_topset '(yr,m,:,2,2) = [nanmedian(Zbot_alpha(IIbot_nonoverlap>0)) nanmedian(Zbot_GPME(IIbot_nonoverlap>0)) nanmedian(Zbot_PME(IIbot_nonoverlap>0))];']);
                            eval(['nextperf_nonoverlap_bottom_' s_topset '(yr,m,:,3,2) = [nanstd(Zbot_alpha(IIbot_nonoverlap>0)) nanstd(Zbot_GPME(IIbot_nonoverlap>0)) nanstd(Zbot_PME(IIbot_nonoverlap>0))];']);
                            eval(['nextperf_nonoverlap_bottom_' s_topset '(yr,m,:,4,2) = [sum(sum(IIbot_nonoverlap>0))*ones(3,1)];']);   
                        end
                    end % if ~isempty(Z)
                end % for m
            end % for yr          
        end % for topset
        save([outdir 'Burgiss_nextperf_' s_asset '_' s_fm '_' datestr(date,'yyyymmdd')], 'nextperf_*');
                           
        
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        % Regressions
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        % construct vintage dummies
        yrmin = 1985;    % make vintage year dummies (w/ one bucket for <=1985)
        yrmax = max(PEfund_vintage);
        vintagedums = zeros(length(alphai),yrmax-yrmin);
        for i = 1:length(alphai)
           if PEfund_vintage(i) > yrmin
               vintagedums(i,PEfund_vintage(i)-yrmin) = 1;
           end
        end
        II = sum(vintagedums); 
        vintagedums = vintagedums(:,II>0); % drop year-dummies with no obs

        % make size quartile dummies (cutoffs by decade: <1990, 1990s, 2000s)
        cutoffs_size1990 = zeros(3,3);  % decade(rows) by quartile (columns)
        II = PEfund_vintage < 1990;                              cutoffs_size1990(1,:) = prctile(PEfund_size1990(II),[25 50 75]);
        II = (PEfund_vintage >= 1990) & (PEfund_vintage < 2000); cutoffs_size1990(2,:) = prctile(PEfund_size1990(II),[25 50 75]);
        II = PEfund_vintage >=2000;                              cutoffs_size1990(3,:) = prctile(PEfund_size1990(II),[25 50 75]);
        % make dummies (smallest size quartile is the omitted category)
        size1990dums = zeros(length(alphai),3);
        for i = 1:length(alphai)
            if PEfund_vintage(i) < 1990, d = 1; elseif PEfund_vintage(i) < 2000, d = 2; else d = 3; end
            if PEfund_size1990(i) > cutoffs_size1990(d,3)
                size1990dums(i,3) = 1;
            elseif PEfund_size1990(i) > cutoffs_size1990(d,2)
                size1990dums(i,2) = 1;
            elseif PEfund_size1990(i) > cutoffs_size1990(d,1)
                size1990dums(i,1) =1;
            end
        end

        % compute alpha (GPME) relative to vintage year mean
        yrs = min(PEfund_vintage):max(PEfund_vintage);
        for i = 1:length(yrs)
            alpha_avg(i,1) = nanmean(alphai(PEfund_vintage==yrs(i)));
            GPME_avg(i,1)  = nanmean(GPMEi(PEfund_vintage==yrs(i)));
            PME_avg(i,1)   = nanmean(PMEi(PEfund_vintage==yrs(i)));
        end
        alphai_rel = ones(length(alphai),1)*NaN;
        GPMEi_rel  = ones(length(alphai),1)*NaN;
        PMEi_rel   = ones(length(alphai),1)*NaN;
        for i = 1:length(alphai)
            alphai_rel(i,1) = alphai(i) - alpha_avg(PEfund_vintage(i)-min(yrs)+1);
            GPMEi_rel(i,1)  = GPMEi(i) - GPME_avg(PEfund_vintage(i)-min(yrs)+1);
            PMEi_rel(i,1)   = PMEi(i) - PME_avg(PEfund_vintage(i)-min(yrs)+1);
        end


        %---------------------------------------------------------------------
        % Regressions of performance (alpha, GPME) on size (in $ Billions)
        %---------------------------------------------------------------------    
        disp(' '); disp('Performance-Size regressions; size quartiles + vintage FE')     
        X = [ones(length(alphai),1) size1990dums vintagedums];
        regrout(alphai, GPMEi, PMEi, X);

        disp(' '); disp('Performance-Size regressions; size quartiles + seq nr + vintage FE')
        % NB: IF INCLUDE VINTAGE DUMMIES THEN DELETING OBS (DUE TO MISSING SEQUENCE NRS) MAY MAKE SOME DUMMIES COLINEAR!!
        X = [ones(length(alphai),1) size1990dums log(PEfund_seqnr) vintagedums];
        II = ~isnan(PEfund_seqnr); X = X(II,:);
        regrout(alphai(II), GPMEi(II), PMEi(II), X);

        end % if (samp==1) || (samp == 5) || (samp==6)
    
    end % for fm
    
end % for samp

toc



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Done, do clean up: delete the temporary data file and save the diary
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if ~debug && exist('temp.mat','file')
    delete('temp.mat');    % delete temporary file, if it exists
end

diary off   % this saves the screen output



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Error handling: ensure diary is saved and contains basic error description
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
catch err 
    disp(' ')
    disp(err.identifier)
    disp(err.message)
    disp(['Line ' num2str(err.stack(end).line)]);
    
    % clean up the temporary data file
    if ~debug && exist('temp.mat','file')
        delete('temp.mat');    % delete temporary file, if it exists
    end
    
    diary off       % this saves the screen output
    
    rethrow(err); 
end
